http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/matrix_operations.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/matrix_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/matrix_operations.hpp new file mode 100644 index 0000000..c9dec88 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/matrix_operations.hpp @@ -0,0 +1,1303 @@ +#ifndef VIENNACL_LINALG_MATRIX_OPERATIONS_HPP_ +#define VIENNACL_LINALG_MATRIX_OPERATIONS_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/matrix_operations.hpp + @brief Implementations of dense matrix related operations including matrix-vector products. +*/ + +#include "viennacl/forwards.h" +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/vector_proxy.hpp" +#include "viennacl/tools/tools.hpp" +#include "viennacl/meta/enable_if.hpp" +#include "viennacl/meta/predicate.hpp" +#include "viennacl/meta/result_of.hpp" +#include "viennacl/traits/size.hpp" +#include "viennacl/traits/start.hpp" +#include "viennacl/traits/handle.hpp" +#include "viennacl/traits/stride.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/linalg/host_based/matrix_operations.hpp" + +#ifdef VIENNACL_WITH_OPENCL + #include "viennacl/linalg/opencl/matrix_operations.hpp" +#endif + +#ifdef VIENNACL_WITH_CUDA + #include "viennacl/linalg/cuda/matrix_operations.hpp" +#endif + +namespace viennacl +{ + namespace linalg + { + + template<typename DestNumericT, typename SrcNumericT> + void convert(matrix_base<DestNumericT> & dest, matrix_base<SrcNumericT> const & src) + { + assert(viennacl::traits::size1(dest) == viennacl::traits::size1(src) && bool("Incompatible matrix sizes in m1 = m2 (convert): size1(m1) != size1(m2)")); + assert(viennacl::traits::size2(dest) == viennacl::traits::size2(src) && bool("Incompatible matrix sizes in m1 = m2 (convert): size2(m1) != size2(m2)")); + + switch (viennacl::traits::handle(dest).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::convert(dest, src); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::convert(dest, src); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::convert(dest, src); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + template<typename NumericT, + typename SizeT, typename DistanceT> + void trans(const matrix_expression<const matrix_base<NumericT, SizeT, DistanceT>,const matrix_base<NumericT, SizeT, DistanceT>, op_trans> & proxy, + matrix_base<NumericT> & temp_trans) + { + switch (viennacl::traits::handle(proxy).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::trans(proxy, temp_trans); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::trans(proxy,temp_trans); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::trans(proxy,temp_trans); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + template<typename NumericT, + typename ScalarType1> + void am(matrix_base<NumericT> & mat1, + matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) + { + switch (viennacl::traits::handle(mat1).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + template<typename NumericT, + typename ScalarType1, typename ScalarType2> + void ambm(matrix_base<NumericT> & mat1, + matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, + matrix_base<NumericT> const & mat3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) + { + switch (viennacl::traits::handle(mat1).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::ambm(mat1, + mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, + mat3, beta, len_beta, reciprocal_beta, flip_sign_beta); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::ambm(mat1, + mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, + mat3, beta, len_beta, reciprocal_beta, flip_sign_beta); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::ambm(mat1, + mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, + mat3, beta, len_beta, reciprocal_beta, flip_sign_beta); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + template<typename NumericT, + typename ScalarType1, typename ScalarType2> + void ambm_m(matrix_base<NumericT> & mat1, + matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, + matrix_base<NumericT> const & mat3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) + { + switch (viennacl::traits::handle(mat1).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::ambm_m(mat1, + mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, + mat3, beta, len_beta, reciprocal_beta, flip_sign_beta); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::ambm_m(mat1, + mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, + mat3, beta, len_beta, reciprocal_beta, flip_sign_beta); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::ambm_m(mat1, + mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, + mat3, beta, len_beta, reciprocal_beta, flip_sign_beta); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + template<typename NumericT> + void matrix_assign(matrix_base<NumericT> & mat, NumericT s, bool clear = false) + { + switch (viennacl::traits::handle(mat).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::matrix_assign(mat, s, clear); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::matrix_assign(mat, s, clear); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::matrix_assign(mat, s, clear); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + template<typename NumericT> + void matrix_diagonal_assign(matrix_base<NumericT> & mat, NumericT s) + { + switch (viennacl::traits::handle(mat).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::matrix_diagonal_assign(mat, s); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::matrix_diagonal_assign(mat, s); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::matrix_diagonal_assign(mat, s); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + /** @brief Dispatcher interface for A = diag(v, k) */ + template<typename NumericT> + void matrix_diag_from_vector(const vector_base<NumericT> & v, int k, matrix_base<NumericT> & A) + { + switch (viennacl::traits::handle(v).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::matrix_diag_from_vector(v, k, A); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::matrix_diag_from_vector(v, k, A); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::matrix_diag_from_vector(v, k, A); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + /** @brief Dispatcher interface for v = diag(A, k) */ + template<typename NumericT> + void matrix_diag_to_vector(const matrix_base<NumericT> & A, int k, vector_base<NumericT> & v) + { + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::matrix_diag_to_vector(A, k, v); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::matrix_diag_to_vector(A, k, v); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::matrix_diag_to_vector(A, k, v); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + template<typename NumericT> + void matrix_row(const matrix_base<NumericT> & A, unsigned int i, vector_base<NumericT> & v) + { + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::matrix_row(A, i, v); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::matrix_row(A, i, v); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::matrix_row(A, i, v); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + template<typename NumericT> + void matrix_column(const matrix_base<NumericT> & A, unsigned int j, vector_base<NumericT> & v) + { + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::matrix_column(A, j, v); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::matrix_column(A, j, v); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::matrix_column(A, j, v); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + /** @brief Computes the Frobenius norm of a matrix - dispatcher interface + * + * @param A The matrix + * @param result The result scalar + * + * Note that if A is strided or off-set, then a copy will be created. + */ + template<typename T> + void norm_frobenius_impl(matrix_base<T> const & A, + scalar<T> & result) + { + typedef typename matrix_base<T>::handle_type HandleType; + + if ((A.start1() > 0) || (A.start2() > 0) || (A.stride1() > 1) || (A.stride2() > 1)) { + if (A.row_major()) { + viennacl::matrix<T, viennacl::row_major> temp_A(A); + viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1); + norm_2_impl(temp, result); + } else { + viennacl::matrix<T, viennacl::column_major> temp_A(A); + viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1); + norm_2_impl(temp, result); + } + } else { + viennacl::vector_base<T> temp(const_cast<HandleType &>(A.handle()), A.internal_size(), 0, 1); + norm_2_impl(temp, result); + } + + } + + /** @brief Computes the Frobenius norm of a vector with final reduction on the CPU + * + * @param A The matrix + * @param result The result scalar + * + * Note that if A is strided or off-set, then a copy will be created. + */ + template<typename T> + void norm_frobenius_cpu(matrix_base<T> const & A, + T & result) + { + typedef typename matrix_base<T>::handle_type HandleType; + + if ((A.start1() > 0) || (A.start2() > 0) || (A.stride1() > 1) || (A.stride2() > 1)) { + if (A.row_major()) { + viennacl::matrix<T, viennacl::row_major> temp_A(A); + viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1); + norm_2_cpu(temp, result); + } else { + viennacl::matrix<T, viennacl::column_major> temp_A(A); + viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1); + norm_2_cpu(temp, result); + } + } else { + viennacl::vector_base<T> temp(const_cast<HandleType &>(A.handle()), A.internal_size(), 0, 1); + norm_2_cpu(temp, result); + } + + } + + // + ///////////////////////// matrix-vector products ///////////////////////////////// + // + + + + // A * x + + /** @brief Carries out matrix-vector multiplication + * + * Implementation of the convenience expression result = prod(mat, vec); + * + * @param mat The matrix + * @param vec The vector + * @param result The result vector + */ + template<typename NumericT> + void prod_impl(const matrix_base<NumericT> & mat, + const vector_base<NumericT> & vec, + vector_base<NumericT> & result) + { + assert( (viennacl::traits::size1(mat) == viennacl::traits::size(result)) && bool("Size check failed at v1 = prod(A, v2): size1(A) != size(v1)")); + assert( (viennacl::traits::size2(mat) == viennacl::traits::size(vec)) && bool("Size check failed at v1 = prod(A, v2): size2(A) != size(v2)")); + + switch (viennacl::traits::handle(mat).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::prod_impl(mat, false, vec, result); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::prod_impl(mat, false, vec, result); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::prod_impl(mat, false, vec, result); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + // trans(A) * x + + /** @brief Carries out matrix-vector multiplication with a transposed matrix + * + * Implementation of the convenience expression result = trans(mat) * vec; + * + * @param mat_trans The transposed matrix proxy + * @param vec The vector + * @param result The result vector + */ + template<typename NumericT> + void prod_impl(const matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & mat_trans, + const vector_base<NumericT> & vec, + vector_base<NumericT> & result) + { + assert( (viennacl::traits::size1(mat_trans.lhs()) == viennacl::traits::size(vec)) && bool("Size check failed at v1 = trans(A) * v2: size1(A) != size(v2)")); + assert( (viennacl::traits::size2(mat_trans.lhs()) == viennacl::traits::size(result)) && bool("Size check failed at v1 = trans(A) * v2: size2(A) != size(v1)")); + + switch (viennacl::traits::handle(mat_trans.lhs()).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::prod_impl(mat_trans.lhs(), true, vec, result); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::prod_impl(mat_trans.lhs(), true, vec, result); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::prod_impl(mat_trans.lhs(), true, vec, result); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + // + ///////////////////////// matrix-matrix products ///////////////////////////////// + // + + /** @brief Carries out matrix-matrix multiplication + * + * Implementation of C = prod(A, B); + * + */ + template<typename NumericT, typename ScalarType > + void prod_impl(const matrix_base<NumericT> & A, + const matrix_base<NumericT> & B, + matrix_base<NumericT> & C, + ScalarType alpha, + ScalarType beta) + { + assert( (viennacl::traits::size1(A) == viennacl::traits::size1(C)) && bool("Size check failed at C = prod(A, B): size1(A) != size1(C)")); + assert( (viennacl::traits::size2(A) == viennacl::traits::size1(B)) && bool("Size check failed at C = prod(A, B): size2(A) != size1(B)")); + assert( (viennacl::traits::size2(B) == viennacl::traits::size2(C)) && bool("Size check failed at C = prod(A, B): size2(B) != size2(C)")); + + + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::prod_impl(A, false, B, false, C, alpha, beta); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::prod_impl(A, false, B, false, C, alpha, beta); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::prod_impl(A, false, B, false, C, alpha, beta); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + + /** @brief Carries out matrix-matrix multiplication + * + * Implementation of C = prod(trans(A), B); + * + */ + template<typename NumericT, typename ScalarType > + void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT>, + const matrix_base<NumericT>, + op_trans> & A, + const matrix_base<NumericT> & B, + matrix_base<NumericT> & C, + ScalarType alpha, + ScalarType beta) + { + assert(viennacl::traits::size2(A.lhs()) == viennacl::traits::size1(C) && bool("Size check failed at C = prod(trans(A), B): size2(A) != size1(C)")); + assert(viennacl::traits::size1(A.lhs()) == viennacl::traits::size1(B) && bool("Size check failed at C = prod(trans(A), B): size1(A) != size1(B)")); + assert(viennacl::traits::size2(B) == viennacl::traits::size2(C) && bool("Size check failed at C = prod(trans(A), B): size2(B) != size2(C)")); + + switch (viennacl::traits::handle(A.lhs()).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::prod_impl(A.lhs(), true, B, false, C, alpha, beta); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::prod_impl(A.lhs(), true, B, false, C, alpha, beta); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::prod_impl(A.lhs(), true, B, false, C, alpha, beta); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + + + /** @brief Carries out matrix-matrix multiplication + * + * Implementation of C = prod(A, trans(B)); + * + */ + template<typename NumericT, typename ScalarType > + void prod_impl(const matrix_base<NumericT> & A, + const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & B, + matrix_base<NumericT> & C, + ScalarType alpha, + ScalarType beta) + { + assert(viennacl::traits::size1(A) == viennacl::traits::size1(C) && bool("Size check failed at C = prod(A, trans(B)): size1(A) != size1(C)")); + assert(viennacl::traits::size2(A) == viennacl::traits::size2(B.lhs()) && bool("Size check failed at C = prod(A, trans(B)): size2(A) != size2(B)")); + assert(viennacl::traits::size1(B.lhs()) == viennacl::traits::size2(C) && bool("Size check failed at C = prod(A, trans(B)): size1(B) != size2(C)")); + + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::prod_impl(A, false, B.lhs(), true, C, alpha, beta); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::prod_impl(A, false, B.lhs(), true, C, alpha, beta); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::prod_impl(A, false, B.lhs(), true, C, alpha, beta); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + + /** @brief Carries out matrix-matrix multiplication + * + * Implementation of C = prod(trans(A), trans(B)); + * + */ + template<typename NumericT, typename ScalarType > + void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & A, + const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & B, + matrix_base<NumericT> & C, + ScalarType alpha, + ScalarType beta) + { + assert(viennacl::traits::size2(A.lhs()) == viennacl::traits::size1(C) && bool("Size check failed at C = prod(trans(A), trans(B)): size2(A) != size1(C)")); + assert(viennacl::traits::size1(A.lhs()) == viennacl::traits::size2(B.lhs()) && bool("Size check failed at C = prod(trans(A), trans(B)): size1(A) != size2(B)")); + assert(viennacl::traits::size1(B.lhs()) == viennacl::traits::size2(C) && bool("Size check failed at C = prod(trans(A), trans(B)): size1(B) != size2(C)")); + + switch (viennacl::traits::handle(A.lhs()).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + ///////////////////////// summation operations ///////////// + + template<typename NumericT> + void row_sum_impl(matrix_base<NumericT> const & A, vector_base<NumericT> & result) + { + viennacl::vector<NumericT> all_ones = viennacl::scalar_vector<NumericT>(A.size2(), NumericT(1), viennacl::traits::context(A)); + viennacl::linalg::prod_impl(A, all_ones, result); + } + + template<typename NumericT> + void column_sum_impl(matrix_base<NumericT> const & A, vector_base<NumericT> & result) + { + viennacl::vector<NumericT> all_ones = viennacl::scalar_vector<NumericT>(A.size1(), NumericT(1), viennacl::traits::context(A)); + viennacl::linalg::prod_impl(matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>(A, A), all_ones, result); + } + + ///////////////////////// Elementwise operations ///////////// + + + + /** @brief Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syntax). Don't use this function directly, use element_prod() and element_div(). + * + * @param A The result matrix (or -range, or -slice) + * @param proxy The proxy object holding B, C, and the operation + */ + template<typename T, typename OP> + void element_op(matrix_base<T> & A, + matrix_expression<const matrix_base<T>, const matrix_base<T>, OP> const & proxy) + { + assert( (viennacl::traits::size1(A) == viennacl::traits::size1(proxy)) && bool("Size check failed at A = element_op(B): size1(A) != size1(B)")); + assert( (viennacl::traits::size2(A) == viennacl::traits::size2(proxy)) && bool("Size check failed at A = element_op(B): size2(A) != size2(B)")); + + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::element_op(A, proxy); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::element_op(A, proxy); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::element_op(A, proxy); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + +#define VIENNACL_MAKE_BINARY_OP(OPNAME)\ + template<typename T>\ + viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<op_##OPNAME> >\ + element_##OPNAME(matrix_base<T> const & A, matrix_base<T> const & B)\ + {\ + return viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<op_##OPNAME> >(A, B);\ + }\ +\ + template<typename M1, typename M2, typename OP, typename T>\ + viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP>,\ + const matrix_base<T>,\ + op_element_binary<op_##OPNAME> >\ + element_##OPNAME(matrix_expression<const M1, const M2, OP> const & proxy, matrix_base<T> const & B)\ + {\ + return viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP>,\ + const matrix_base<T>,\ + op_element_binary<op_##OPNAME> >(proxy, B);\ + }\ +\ + template<typename T, typename M2, typename M3, typename OP>\ + viennacl::matrix_expression<const matrix_base<T>,\ + const matrix_expression<const M2, const M3, OP>,\ + op_element_binary<op_##OPNAME> >\ + element_##OPNAME(matrix_base<T> const & A, matrix_expression<const M2, const M3, OP> const & proxy)\ + {\ + return viennacl::matrix_expression<const matrix_base<T>,\ + const matrix_expression<const M2, const M3, OP>,\ + op_element_binary<op_##OPNAME> >(A, proxy);\ + }\ +\ + template<typename M1, typename M2, typename OP1,\ + typename M3, typename M4, typename OP2>\ + viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP1>,\ + const matrix_expression<const M3, const M4, OP2>,\ + op_element_binary<op_##OPNAME> >\ + element_##OPNAME(matrix_expression<const M1, const M2, OP1> const & proxy1,\ + matrix_expression<const M3, const M4, OP2> const & proxy2)\ + {\ + return viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP1>,\ + const matrix_expression<const M3, const M4, OP2>,\ + op_element_binary<op_##OPNAME> >(proxy1, proxy2);\ + } + + VIENNACL_MAKE_BINARY_OP(prod) + VIENNACL_MAKE_BINARY_OP(div) + VIENNACL_MAKE_BINARY_OP(pow) + + VIENNACL_MAKE_BINARY_OP(eq) + VIENNACL_MAKE_BINARY_OP(neq) + VIENNACL_MAKE_BINARY_OP(greater) + VIENNACL_MAKE_BINARY_OP(less) + VIENNACL_MAKE_BINARY_OP(geq) + VIENNACL_MAKE_BINARY_OP(leq) + +#undef VIENNACL_GENERATE_BINARY_OP_OVERLOADS + + + +#define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname) \ + template<typename T> \ + viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<op_##funcname> > \ + element_##funcname(matrix_base<T> const & A) \ + { \ + return viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<op_##funcname> >(A, A); \ + } \ + template<typename LHS, typename RHS, typename OP> \ + viennacl::matrix_expression<const matrix_expression<const LHS, const RHS, OP>, \ + const matrix_expression<const LHS, const RHS, OP>, \ + op_element_unary<op_##funcname> > \ + element_##funcname(matrix_expression<const LHS, const RHS, OP> const & proxy) \ + { \ + return viennacl::matrix_expression<const matrix_expression<const LHS, const RHS, OP>, \ + const matrix_expression<const LHS, const RHS, OP>, \ + op_element_unary<op_##funcname> >(proxy, proxy); \ + } \ + + VIENNACL_MAKE_UNARY_ELEMENT_OP(abs) + VIENNACL_MAKE_UNARY_ELEMENT_OP(acos) + VIENNACL_MAKE_UNARY_ELEMENT_OP(asin) + VIENNACL_MAKE_UNARY_ELEMENT_OP(atan) + VIENNACL_MAKE_UNARY_ELEMENT_OP(ceil) + VIENNACL_MAKE_UNARY_ELEMENT_OP(cos) + VIENNACL_MAKE_UNARY_ELEMENT_OP(cosh) + VIENNACL_MAKE_UNARY_ELEMENT_OP(exp) + VIENNACL_MAKE_UNARY_ELEMENT_OP(fabs) + VIENNACL_MAKE_UNARY_ELEMENT_OP(floor) + VIENNACL_MAKE_UNARY_ELEMENT_OP(log) + VIENNACL_MAKE_UNARY_ELEMENT_OP(log10) + VIENNACL_MAKE_UNARY_ELEMENT_OP(sin) + VIENNACL_MAKE_UNARY_ELEMENT_OP(sinh) + VIENNACL_MAKE_UNARY_ELEMENT_OP(sqrt) + VIENNACL_MAKE_UNARY_ELEMENT_OP(tan) + VIENNACL_MAKE_UNARY_ELEMENT_OP(tanh) + +#undef VIENNACL_MAKE_UNARY_ELEMENT_OP + + + // + ///////////////////////// miscellaneous operations ///////////////////////////////// + // + + + /** @brief Returns a proxy class for the operation mat += vec1 * vec2^T, i.e. a rank 1 update + * + * @param vec1 The first vector + * @param vec2 The second vector + */ + template<typename NumericT> + viennacl::matrix_expression<const vector_base<NumericT>, const vector_base<NumericT>, op_prod> + outer_prod(const vector_base<NumericT> & vec1, const vector_base<NumericT> & vec2) + { + return viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>(vec1, vec2); + } + + + /** @brief The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update + * + * Implementation of the convenience expression result += alpha * outer_prod(vec1, vec2); + * + * @param mat1 The matrix to be updated + * @param alpha The scaling factor (either a viennacl::scalar<>, float, or double) + * @param len_alpha Length of the buffer for an eventual final reduction step (currently always '1') + * @param reciprocal_alpha Use 1/alpha instead of alpha + * @param flip_sign_alpha Use -alpha instead of alpha + * @param vec1 The first vector + * @param vec2 The second vector + */ + template<typename NumericT, typename S1> + void scaled_rank_1_update(matrix_base<NumericT> & mat1, + S1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, + const vector_base<NumericT> & vec1, + const vector_base<NumericT> & vec2) + { + switch (viennacl::traits::handle(mat1).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::scaled_rank_1_update(mat1, + alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, + vec1, vec2); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::scaled_rank_1_update(mat1, + alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, + vec1, vec2); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::scaled_rank_1_update(mat1, + alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, + vec1, vec2); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + /** @brief This function stores the diagonal and the superdiagonal of a matrix in two vectors. + * + * + * @param A The matrix from which the vectors will be extracted of. + * @param dh The vector in which the diagonal of the matrix will be stored in. + * @param sh The vector in which the superdiagonal of the matrix will be stored in. + */ + + template <typename NumericT, typename VectorType> + void bidiag_pack(matrix_base<NumericT> & A, + VectorType & dh, + VectorType & sh + ) + { + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::bidiag_pack(A, dh, sh); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::bidiag_pack(A, dh, sh); + break; +#endif + +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::bidiag_pack(A, dh, sh); + break; +#endif + + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + + + } + /** @brief This function copies a row or a column from a matrix to a vector. + * + * + * @param A The matrix where to copy from. + * @param V The vector to fill with data. + * @param row_start The number of the first row to copy. + * @param col_start The number of the first column to copy. + * @param copy_col Set to TRUE to copy a column, FALSE to copy a row. + */ + + template <typename SCALARTYPE> + void copy_vec(matrix_base<SCALARTYPE>& A, + vector_base<SCALARTYPE>& V, + vcl_size_t row_start, + vcl_size_t col_start, + bool copy_col + ) + { + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::copy_vec(A, V, row_start, col_start, copy_col); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::copy_vec(A, V, row_start, col_start, copy_col); + break; +#endif + +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::copy_vec(A, V, row_start, col_start, copy_col); + break; +#endif + + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + + } + + /** @brief This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P + * + * @param A The matrix to be updated. + * @param D The normalized householder vector. + * @param start The repetition counter. + */ + template <typename NumericT> + void house_update_A_left(matrix_base<NumericT> & A, + vector_base<NumericT> & D, + vcl_size_t start) + { + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::house_update_A_left(A, D, start); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::house_update_A_left(A, D, start); + break; +#endif + +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::house_update_A_left(A, D, start); + break; +#endif + + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + /** @brief This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P + * + * + * @param A The matrix to be updated. + * @param D The normalized householder vector. + */ + + template <typename NumericT> + void house_update_A_right(matrix_base<NumericT>& A, + vector_base<NumericT> & D) + { + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::house_update_A_right(A, D); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::house_update_A_right(A, D); + break; +#endif + +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::house_update_A_right(A, D); + break; +#endif + + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + /** @brief This function updates the matrix Q, which is needed for the computation of the eigenvectors. + * + * @param Q The matrix to be updated. + * @param D The householder vector. + * @param A_size1 size1 of matrix A + */ + + template <typename NumericT> + void house_update_QL(matrix_base<NumericT> & Q, + vector_base<NumericT> & D, + vcl_size_t A_size1) + { + switch (viennacl::traits::handle(Q).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::house_update_QL(Q, D, A_size1); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::house_update_QL(Q, D, A_size1); + break; +#endif + +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::house_update_QL(Q, D, A_size1); + break; +#endif + + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + /** @brief This function updates the matrix Q. It is part of the tql2 algorithm. + * + * + * @param Q The matrix to be updated. + * @param tmp1 Vector with data from the tql2 algorithm. + * @param tmp2 Vector with data from the tql2 algorithm. + * @param l Data from the tql2 algorithm. + * @param m Data from the tql2 algorithm. + */ + template<typename NumericT> + void givens_next(matrix_base<NumericT> & Q, + vector_base<NumericT> & tmp1, + vector_base<NumericT> & tmp2, + int l, + int m + ) + { + switch (viennacl::traits::handle(Q).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::givens_next(Q, tmp1, tmp2, l, m); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::givens_next(Q, tmp1, tmp2, l, m); + break; +#endif + +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::givens_next(Q, tmp1, tmp2, l, m); + break; +#endif + + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + } //namespace linalg + + + + + // + ///////////////////////// Operator overloads ///////////////////////////////// + // + + + //v += A * x + /** @brief Implementation of the operation v1 += A * v2, where A is a matrix + * + * @param v1 The result vector v1 where A * v2 is added to + * @param proxy An expression template proxy class. + */ + template<typename NumericT> + vector<NumericT> + operator+=(vector_base<NumericT> & v1, + const viennacl::vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, viennacl::op_prod> & proxy) + { + assert(viennacl::traits::size1(proxy.lhs()) == v1.size() && bool("Size check failed for v1 += A * v2: size1(A) != size(v1)")); + + vector<NumericT> result(viennacl::traits::size1(proxy.lhs())); + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); + v1 += result; + return v1; + } + + /** @brief Implementation of the operation v1 -= A * v2, where A is a matrix + * + * @param v1 The result vector v1 where A * v2 is subtracted from + * @param proxy An expression template proxy class. + */ + template<typename NumericT> + vector<NumericT> + operator-=(vector_base<NumericT> & v1, + const viennacl::vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, viennacl::op_prod> & proxy) + { + assert(viennacl::traits::size1(proxy.lhs()) == v1.size() && bool("Size check failed for v1 -= A * v2: size1(A) != size(v1)")); + + vector<NumericT> result(viennacl::traits::size1(proxy.lhs())); + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); + v1 -= result; + return v1; + } + + + + + + //free functions: + /** @brief Implementation of the operation 'result = v1 + A * v2', where A is a matrix + * + * @param v1 The addend vector. + * @param proxy An expression template proxy class. + */ + template<typename NumericT> + viennacl::vector<NumericT> + operator+(const vector_base<NumericT> & v1, + const vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy) + { + assert(viennacl::traits::size1(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed for v1 + A * v2: size1(A) != size(v1)")); + + vector<NumericT> result(viennacl::traits::size(v1)); + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); + result += v1; + return result; + } + + /** @brief Implementation of the operation 'result = v1 - A * v2', where A is a matrix + * + * @param v1 The addend vector. + * @param proxy An expression template proxy class. + */ + template<typename NumericT> + viennacl::vector<NumericT> + operator-(const vector_base<NumericT> & v1, + const vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy) + { + assert(viennacl::traits::size1(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed for v1 - A * v2: size1(A) != size(v1)")); + + vector<NumericT> result(viennacl::traits::size(v1)); + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); + result = v1 - result; + return result; + } + + + ////////// transposed_matrix_proxy + + + //v += A^T * x + /** @brief Implementation of the operation v1 += A * v2, where A is a matrix + * + * @param v1 The addend vector where the result is written to. + * @param proxy An expression template proxy class. + */ + template<typename NumericT> + vector<NumericT> + operator+=(vector_base<NumericT> & v1, + const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>, + const vector_base<NumericT>, + op_prod> & proxy) + { + assert(viennacl::traits::size2(proxy.lhs()) == v1.size() && bool("Size check failed in v1 += trans(A) * v2: size2(A) != size(v1)")); + + vector<NumericT> result(viennacl::traits::size2(proxy.lhs())); + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); + v1 += result; + return v1; + } + + //v -= A^T * x + /** @brief Implementation of the operation v1 -= A * v2, where A is a matrix + * + * @param v1 The addend vector where the result is written to. + * @param proxy An expression template proxy class. + */ + template<typename NumericT> + vector<NumericT> + operator-=(vector_base<NumericT> & v1, + const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>, + const vector_base<NumericT>, + op_prod> & proxy) + { + assert(viennacl::traits::size2(proxy.lhs()) == v1.size() && bool("Size check failed in v1 += trans(A) * v2: size2(A) != size(v1)")); + + vector<NumericT> result(viennacl::traits::size2(proxy.lhs())); + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); + v1 -= result; + return v1; + } + + + //free functions: + /** @brief Implementation of the operation 'result = v1 + A * v2', where A is a matrix + * + * @param v1 The addend vector. + * @param proxy An expression template proxy class. + */ + template<typename NumericT> + vector<NumericT> + operator+(const vector_base<NumericT> & v1, + const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>, + const vector_base<NumericT>, + op_prod> & proxy) + { + assert(viennacl::traits::size2(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed in v1 + trans(A) * v2: size2(A) != size(v1)")); + + vector<NumericT> result(viennacl::traits::size(v1)); + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); + result += v1; + return result; + } + + /** @brief Implementation of the operation 'result = v1 - A * v2', where A is a matrix + * + * @param v1 The addend vector. + * @param proxy An expression template proxy class. + */ + template<typename NumericT> + vector<NumericT> + operator-(const vector_base<NumericT> & v1, + const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>, + const vector_base<NumericT>, + op_prod> & proxy) + { + assert(viennacl::traits::size2(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed in v1 - trans(A) * v2: size2(A) != size(v1)")); + + vector<NumericT> result(viennacl::traits::size(v1)); + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); + result = v1 - result; + return result; + } + + +} //namespace viennacl + + +#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/maxmin.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/maxmin.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/maxmin.hpp new file mode 100644 index 0000000..9269598 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/maxmin.hpp @@ -0,0 +1,152 @@ +#ifndef VIENNACL_LINALG_MAXMIN_HPP_ +#define VIENNACL_LINALG_MAXMIN_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file norm_inf.hpp + @brief Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations. +*/ + +#include <cmath> +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" +#include "viennacl/meta/enable_if.hpp" +#include "viennacl/meta/tag_of.hpp" +#include "viennacl/meta/result_of.hpp" + +namespace viennacl +{ + // + // generic norm_inf function + // uses tag dispatch to identify which algorithm + // should be called + // + namespace linalg + { + + + // ---------------------------------------------------- + // STL + // + template< typename NumericT > + NumericT max(std::vector<NumericT> const & v1) + { + //std::cout << "stl .. " << std::endl; + NumericT result = v1[0]; + for (vcl_size_t i=1; i<v1.size(); ++i) + { + if (v1[i] > result) + result = v1[i]; + } + + return result; + } + + // ---------------------------------------------------- + // VIENNACL + // + template< typename ScalarType> + viennacl::scalar_expression< const viennacl::vector_base<ScalarType>, + const viennacl::vector_base<ScalarType>, + viennacl::op_max > + max(viennacl::vector_base<ScalarType> const & v1) + { + //std::cout << "viennacl .. " << std::endl; + return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>, + const viennacl::vector_base<ScalarType>, + viennacl::op_max >(v1, v1); + } + + // with vector expression: + template<typename LHS, typename RHS, typename OP> + viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>, + const viennacl::vector_expression<const LHS, const RHS, OP>, + viennacl::op_max> + max(viennacl::vector_expression<const LHS, const RHS, OP> const & vector) + { + return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>, + const viennacl::vector_expression<const LHS, const RHS, OP>, + viennacl::op_max >(vector, vector); + } + + // ---------------------------------------------------- + // STL + // + template< typename NumericT > + NumericT min(std::vector<NumericT> const & v1) + { + //std::cout << "stl .. " << std::endl; + NumericT result = v1[0]; + for (vcl_size_t i=1; i<v1.size(); ++i) + { + if (v1[i] < result) + result = v1[i]; + } + + return result; + } + + // ---------------------------------------------------- + // VIENNACL + // + template< typename ScalarType> + viennacl::scalar_expression< const viennacl::vector_base<ScalarType>, + const viennacl::vector_base<ScalarType>, + viennacl::op_min > + min(viennacl::vector_base<ScalarType> const & v1) + { + //std::cout << "viennacl .. " << std::endl; + return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>, + const viennacl::vector_base<ScalarType>, + viennacl::op_min >(v1, v1); + } + + template< typename ScalarType> + viennacl::scalar_expression< const viennacl::vector_base<ScalarType>, + const viennacl::vector_base<ScalarType>, + viennacl::op_min > + min(viennacl::vector<ScalarType> const & v1) + { + //std::cout << "viennacl .. " << std::endl; + return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>, + const viennacl::vector_base<ScalarType>, + viennacl::op_min >(v1, v1); + } + + // with vector expression: + template<typename LHS, typename RHS, typename OP> + viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>, + const viennacl::vector_expression<const LHS, const RHS, OP>, + viennacl::op_min> + min(viennacl::vector_expression<const LHS, const RHS, OP> const & vector) + { + return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>, + const viennacl::vector_expression<const LHS, const RHS, OP>, + viennacl::op_min >(vector, vector); + } + + + + } // end namespace linalg +} // end namespace viennacl +#endif + + + + + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/misc_operations.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/misc_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/misc_operations.hpp new file mode 100644 index 0000000..208573f --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/misc_operations.hpp @@ -0,0 +1,94 @@ +#ifndef VIENNACL_LINALG_MISC_OPERATIONS_HPP_ +#define VIENNACL_LINALG_MISC_OPERATIONS_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/misc_operations.hpp + @brief Implementations of miscellaneous operations +*/ + +#include "viennacl/forwards.h" +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/tools/tools.hpp" +#include "viennacl/linalg/host_based/misc_operations.hpp" + +#ifdef VIENNACL_WITH_OPENCL + #include "viennacl/linalg/opencl/misc_operations.hpp" +#endif + +#ifdef VIENNACL_WITH_CUDA + #include "viennacl/linalg/cuda/misc_operations.hpp" +#endif + +namespace viennacl +{ + namespace linalg + { + + namespace detail + { + + template<typename ScalarType> + void level_scheduling_substitute(vector<ScalarType> & vec, + viennacl::backend::mem_handle const & row_index_array, + viennacl::backend::mem_handle const & row_buffer, + viennacl::backend::mem_handle const & col_buffer, + viennacl::backend::mem_handle const & element_buffer, + vcl_size_t num_rows + ) + { + assert( viennacl::traits::handle(vec).get_active_handle_id() == row_index_array.get_active_handle_id() && bool("Incompatible memory domains")); + assert( viennacl::traits::handle(vec).get_active_handle_id() == row_buffer.get_active_handle_id() && bool("Incompatible memory domains")); + assert( viennacl::traits::handle(vec).get_active_handle_id() == col_buffer.get_active_handle_id() && bool("Incompatible memory domains")); + assert( viennacl::traits::handle(vec).get_active_handle_id() == element_buffer.get_active_handle_id() && bool("Incompatible memory domains")); + + switch (viennacl::traits::handle(vec).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::detail::level_scheduling_substitute(vec, row_index_array, row_buffer, col_buffer, element_buffer, num_rows); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::detail::level_scheduling_substitute(vec, row_index_array, row_buffer, col_buffer, element_buffer, num_rows); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::detail::level_scheduling_substitute(vec, row_index_array, row_buffer, col_buffer, element_buffer, num_rows); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + + + } //namespace detail + + + } //namespace linalg +} //namespace viennacl + + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/mixed_precision_cg.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/mixed_precision_cg.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/mixed_precision_cg.hpp new file mode 100644 index 0000000..78254b3 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/mixed_precision_cg.hpp @@ -0,0 +1,199 @@ +#ifndef VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_ +#define VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/mixed_precision_cg.hpp + @brief The conjugate gradient method using mixed precision is implemented here. Experimental. +*/ + +#include <vector> +#include <map> +#include <cmath> +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" +#include "viennacl/linalg/ilu.hpp" +#include "viennacl/linalg/prod.hpp" +#include "viennacl/linalg/inner_prod.hpp" +#include "viennacl/traits/clear.hpp" +#include "viennacl/traits/size.hpp" +#include "viennacl/meta/result_of.hpp" +#include "viennacl/backend/memory.hpp" + +#include "viennacl/vector_proxy.hpp" + +namespace viennacl +{ + namespace linalg + { + + /** @brief A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve() function + */ + class mixed_precision_cg_tag + { + public: + /** @brief The constructor + * + * @param tol Relative tolerance for the residual (solver quits if ||r|| < tol * ||r_initial||) + * @param max_iterations The maximum number of iterations + * @param inner_tol Inner tolerance for the low-precision iterations + */ + mixed_precision_cg_tag(double tol = 1e-8, unsigned int max_iterations = 300, float inner_tol = 1e-2f) : tol_(tol), iterations_(max_iterations), inner_tol_(inner_tol) {} + + /** @brief Returns the relative tolerance */ + double tolerance() const { return tol_; } + /** @brief Returns the relative tolerance */ + float inner_tolerance() const { return inner_tol_; } + /** @brief Returns the maximum number of iterations */ + unsigned int max_iterations() const { return iterations_; } + + /** @brief Return the number of solver iterations: */ + unsigned int iters() const { return iters_taken_; } + void iters(unsigned int i) const { iters_taken_ = i; } + + /** @brief Returns the estimated relative error at the end of the solver run */ + double error() const { return last_error_; } + /** @brief Sets the estimated relative error at the end of the solver run */ + void error(double e) const { last_error_ = e; } + + + private: + double tol_; + unsigned int iterations_; + float inner_tol_; + + //return values from solver + mutable unsigned int iters_taken_; + mutable double last_error_; + }; + + + /** @brief Implementation of the conjugate gradient solver without preconditioner + * + * Following the algorithm in the book by Y. Saad "Iterative Methods for sparse linear systems" + * + * @param matrix The system matrix + * @param rhs The load vector + * @param tag Solver configuration tag + * @return The result vector + */ + template<typename MatrixType, typename VectorType> + VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag) + { + //typedef typename VectorType::value_type ScalarType; + typedef typename viennacl::result_of::cpu_value_type<VectorType>::type CPU_ScalarType; + + //std::cout << "Starting CG" << std::endl; + vcl_size_t problem_size = viennacl::traits::size(rhs); + VectorType result(rhs); + viennacl::traits::clear(result); + + VectorType residual = rhs; + + CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(rhs, rhs); + CPU_ScalarType new_ip_rr = 0; + CPU_ScalarType norm_rhs_squared = ip_rr; + + if (norm_rhs_squared <= 0) //solution is zero if RHS norm is zero + return result; + + viennacl::vector<float> residual_low_precision(problem_size, viennacl::traits::context(rhs)); + viennacl::vector<float> result_low_precision(problem_size, viennacl::traits::context(rhs)); + viennacl::vector<float> p_low_precision(problem_size, viennacl::traits::context(rhs)); + viennacl::vector<float> tmp_low_precision(problem_size, viennacl::traits::context(rhs)); + float inner_ip_rr = static_cast<float>(ip_rr); + float new_inner_ip_rr = 0; + float initial_inner_rhs_norm_squared = static_cast<float>(ip_rr); + float alpha; + float beta; + + // transfer rhs to single precision: + p_low_precision = rhs; + residual_low_precision = p_low_precision; + + // transfer matrix to single precision: + viennacl::compressed_matrix<float> matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs)); + viennacl::backend::memory_copy(matrix.handle1(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle1()), 0, 0, matrix_low_precision.handle1().raw_size() ); + viennacl::backend::memory_copy(matrix.handle2(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle2()), 0, 0, matrix_low_precision.handle2().raw_size() ); + + viennacl::vector_base<CPU_ScalarType> matrix_elements_high_precision(const_cast<viennacl::backend::mem_handle &>(matrix.handle()), matrix.nnz(), 0, 1); + viennacl::vector_base<float> matrix_elements_low_precision(matrix_low_precision.handle(), matrix.nnz(), 0, 1); + matrix_elements_low_precision = matrix_elements_high_precision; + matrix_low_precision.generate_row_block_information(); + + for (unsigned int i = 0; i < tag.max_iterations(); ++i) + { + tag.iters(i+1); + + // lower precision 'inner iteration' + tmp_low_precision = viennacl::linalg::prod(matrix_low_precision, p_low_precision); + + alpha = inner_ip_rr / viennacl::linalg::inner_prod(tmp_low_precision, p_low_precision); + result_low_precision += alpha * p_low_precision; + residual_low_precision -= alpha * tmp_low_precision; + + new_inner_ip_rr = viennacl::linalg::inner_prod(residual_low_precision, residual_low_precision); + + beta = new_inner_ip_rr / inner_ip_rr; + inner_ip_rr = new_inner_ip_rr; + + p_low_precision = residual_low_precision + beta * p_low_precision; + + // + // If enough progress has been achieved, update current residual with high precision evaluation + // This is effectively a restart of the CG method + // + if (new_inner_ip_rr < tag.inner_tolerance() * initial_inner_rhs_norm_squared || i == tag.max_iterations()-1) + { + residual = result_low_precision; // reusing residual vector as temporary buffer for conversion. Overwritten below anyway + result += residual; + + // residual = b - Ax (without introducing a temporary) + residual = viennacl::linalg::prod(matrix, result); + residual = rhs - residual; + + new_ip_rr = viennacl::linalg::inner_prod(residual, residual); + if (new_ip_rr / norm_rhs_squared < tag.tolerance() * tag.tolerance())//squared norms involved here + break; + + p_low_precision = residual; + + result_low_precision.clear(); + residual_low_precision = p_low_precision; + initial_inner_rhs_norm_squared = static_cast<float>(new_ip_rr); + inner_ip_rr = static_cast<float>(new_ip_rr); + } + } + + //store last error estimate: + tag.error(std::sqrt(new_ip_rr / norm_rhs_squared)); + + return result; + } + + template<typename MatrixType, typename VectorType> + VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag, viennacl::linalg::no_precond) + { + return solve(matrix, rhs, tag); + } + + + } +} + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/nmf.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/nmf.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/nmf.hpp new file mode 100644 index 0000000..c962c8e --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/nmf.hpp @@ -0,0 +1,91 @@ +#ifndef VIENNACL_LINALG_NMF_HPP +#define VIENNACL_LINALG_NMF_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory + ============================================================================= */ + +/** @file viennacl/linalg/nmf.hpp + @brief Provides a nonnegative matrix factorization implementation. Experimental. + + + */ + +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/linalg/prod.hpp" +#include "viennacl/linalg/norm_2.hpp" +#include "viennacl/linalg/norm_frobenius.hpp" + +#include "viennacl/linalg/host_based/nmf_operations.hpp" + +#ifdef VIENNACL_WITH_OPENCL +#include "viennacl/linalg/opencl/kernels/nmf.hpp" +#include "viennacl/linalg/opencl/nmf_operations.hpp" +#endif + +#ifdef VIENNACL_WITH_CUDA +#include "viennacl/linalg/cuda/nmf_operations.hpp" +#endif + +namespace viennacl +{ + namespace linalg + { + + /** @brief The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized. + * + * @param V Input matrix + * @param W First factor + * @param H Second factor + * @param conf A configuration object holding tolerances and the like + */ + template<typename ScalarType> + void nmf(viennacl::matrix_base<ScalarType> const & V, viennacl::matrix_base<ScalarType> & W, + viennacl::matrix_base<ScalarType> & H, viennacl::linalg::nmf_config const & conf) + { + assert(V.size1() == W.size1() && V.size2() == H.size2() && bool("Dimensions of W and H don't allow for V = W * H")); + assert(W.size2() == H.size1() && bool("Dimensions of W and H don't match, prod(W, H) impossible")); + + switch (viennacl::traits::handle(V).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::nmf(V, W, H, conf); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::nmf(V,W,H,conf); + break; +#endif + +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::nmf(V,W,H,conf); + break; +#endif + + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + + } + + } + } +} + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/norm_1.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_1.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_1.hpp new file mode 100644 index 0000000..e16238b --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_1.hpp @@ -0,0 +1,104 @@ +#ifndef VIENNACL_LINALG_NORM_1_HPP_ +#define VIENNACL_LINALG_NORM_1_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file norm_1.hpp + @brief Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations. +*/ + +#include <cmath> +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" +#include "viennacl/meta/enable_if.hpp" +#include "viennacl/meta/tag_of.hpp" + +namespace viennacl +{ + // + // generic norm_1 function + // uses tag dispatch to identify which algorithm + // should be called + // + namespace linalg + { + + #ifdef VIENNACL_WITH_UBLAS + // ---------------------------------------------------- + // UBLAS + // + template< typename VectorT > + typename viennacl::enable_if< viennacl::is_ublas< typename viennacl::traits::tag_of< VectorT >::type >::value, + typename VectorT::value_type + >::type + norm_1(VectorT const& vector) + { + // std::cout << "ublas .. " << std::endl; + return boost::numeric::ublas::norm_1(vector); + } + #endif + + + // ---------------------------------------------------- + // STL + // + template< typename T, typename A > + T norm_1(std::vector<T, A> const & v1) + { + //std::cout << "stl .. " << std::endl; + T result = 0; + for (typename std::vector<T, A>::size_type i=0; i<v1.size(); ++i) + result += std::fabs(v1[i]); + + return result; + } + + // ---------------------------------------------------- + // VIENNACL + // + template< typename ScalarType> + viennacl::scalar_expression< const viennacl::vector_base<ScalarType>, + const viennacl::vector_base<ScalarType>, + viennacl::op_norm_1 > + norm_1(viennacl::vector_base<ScalarType> const & vector) + { + return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>, + const viennacl::vector_base<ScalarType>, + viennacl::op_norm_1 >(vector, vector); + } + + // with vector expression: + template<typename LHS, typename RHS, typename OP> + viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>, + const viennacl::vector_expression<const LHS, const RHS, OP>, + viennacl::op_norm_1> + norm_1(viennacl::vector_expression<const LHS, const RHS, OP> const & vector) + { + return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>, + const viennacl::vector_expression<const LHS, const RHS, OP>, + viennacl::op_norm_1 >(vector, vector); + } + + } // end namespace linalg +} // end namespace viennacl +#endif + + + + + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/norm_2.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_2.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_2.hpp new file mode 100644 index 0000000..babb285 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_2.hpp @@ -0,0 +1,140 @@ +#ifndef VIENNACL_LINALG_NORM_2_HPP_ +#define VIENNACL_LINALG_NORM_2_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file norm_2.hpp + @brief Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations. +*/ + +#include <cmath> +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" +#include "viennacl/meta/enable_if.hpp" +#include "viennacl/meta/tag_of.hpp" + +namespace viennacl +{ + // + // generic norm_2 function + // uses tag dispatch to identify which algorithm + // should be called + // + namespace linalg + { + #ifdef VIENNACL_WITH_MTL4 + // ---------------------------------------------------- + // MTL4 + // + template< typename VectorT > + typename viennacl::enable_if< viennacl::is_mtl4< typename viennacl::traits::tag_of< VectorT >::type >::value, + typename VectorT::value_type>::type + norm_2(VectorT const & v) + { + return mtl::two_norm(v); + } + #endif + + #ifdef VIENNACL_WITH_ARMADILLO + // ---------------------------------------------------- + // Armadillo + // + template<typename NumericT> + NumericT norm_2(arma::Col<NumericT> const& v) + { + return norm(v); + } + #endif + + #ifdef VIENNACL_WITH_EIGEN + // ---------------------------------------------------- + // EIGEN + // + template< typename VectorT > + typename viennacl::enable_if< viennacl::is_eigen< typename viennacl::traits::tag_of< VectorT >::type >::value, + typename VectorT::RealScalar>::type + norm_2(VectorT const & v) + { + return v.norm(); + } + #endif + + + #ifdef VIENNACL_WITH_UBLAS + // ---------------------------------------------------- + // UBLAS + // + template< typename VectorT > + typename viennacl::enable_if< viennacl::is_ublas< typename viennacl::traits::tag_of< VectorT >::type >::value, + typename VectorT::value_type>::type + norm_2(VectorT const & v) + { + return boost::numeric::ublas::norm_2(v); + } + #endif + + + // ---------------------------------------------------- + // STL + // + template< typename T, typename A > + T norm_2(std::vector<T, A> const & v1) + { + T result = 0; + for (typename std::vector<T, A>::size_type i=0; i<v1.size(); ++i) + result += v1[i] * v1[i]; + + return std::sqrt(result); + } + + // ---------------------------------------------------- + // VIENNACL + // + template< typename ScalarType> + viennacl::scalar_expression< const viennacl::vector_base<ScalarType>, + const viennacl::vector_base<ScalarType>, + viennacl::op_norm_2 > + norm_2(viennacl::vector_base<ScalarType> const & v) + { + //std::cout << "viennacl .. " << std::endl; + return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>, + const viennacl::vector_base<ScalarType>, + viennacl::op_norm_2 >(v, v); + } + + // with vector expression: + template<typename LHS, typename RHS, typename OP> + viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>, + const viennacl::vector_expression<const LHS, const RHS, OP>, + viennacl::op_norm_2> + norm_2(viennacl::vector_expression<const LHS, const RHS, OP> const & vector) + { + return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>, + const viennacl::vector_expression<const LHS, const RHS, OP>, + viennacl::op_norm_2>(vector, vector); + } + + + } // end namespace linalg +} // end namespace viennacl +#endif + + + + + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/norm_frobenius.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_frobenius.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_frobenius.hpp new file mode 100644 index 0000000..6873a53 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_frobenius.hpp @@ -0,0 +1,73 @@ +#ifndef VIENNACL_LINALG_NORM_FROBENIUS_HPP_ +#define VIENNACL_LINALG_NORM_FROBENIUS_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/norm_frobenius.hpp + @brief Generic interface for the Frobenius norm. +*/ + +#include <cmath> +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" +#include "viennacl/meta/enable_if.hpp" +#include "viennacl/meta/tag_of.hpp" + +namespace viennacl +{ + // + // generic norm_frobenius function + // uses tag dispatch to identify which algorithm + // should be called + // + namespace linalg + { + + #ifdef VIENNACL_WITH_UBLAS + // ---------------------------------------------------- + // UBLAS + // + template< typename VectorT > + typename viennacl::enable_if< viennacl::is_ublas< typename viennacl::traits::tag_of< VectorT >::type >::value, + typename VectorT::value_type + >::type + norm_frobenius(VectorT const& v1) + { + return boost::numeric::ublas::norm_frobenius(v1); + } + #endif + + + // ---------------------------------------------------- + // VIENNACL + // + template<typename NumericT> + scalar_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_norm_frobenius> + norm_frobenius(const matrix_base<NumericT> & A) + { + return scalar_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_norm_frobenius>(A, A); + } + + } // end namespace linalg +} // end namespace viennacl +#endif + + + + +
