http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas2_opencl.cu ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas2_opencl.cu b/native-viennaCL/src/main/cpp/libviennacl/src/blas2_opencl.cu new file mode 100644 index 0000000..20c4994 --- /dev/null +++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas2_opencl.cu @@ -0,0 +1,219 @@ +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +// include necessary system headers +#include <iostream> + +#include "viennacl.hpp" +#include "viennacl_private.hpp" + +//include basic scalar and vector types of ViennaCL +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" + +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/linalg/direct_solve.hpp" +#include "viennacl/linalg/prod.hpp" + + +// xGEMV + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemv(ViennaCLBackend backend, + ViennaCLOrder order, ViennaCLTranspose transA, + ViennaCLInt m, ViennaCLInt n, float alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + cl_mem x, ViennaCLInt offx, ViennaCLInt incx, + float beta, + cl_mem y, ViennaCLInt offy, ViennaCLInt incy) +{ + typedef viennacl::vector_base<float>::size_type size_type; + typedef viennacl::vector_base<float>::size_type difference_type; + + viennacl::vector_base<float> v1(x, size_type(n), size_type(offx), difference_type(incx), viennacl::ocl::get_context(backend->opencl_backend.context_id)); + viennacl::vector_base<float> v2(y, size_type(m), size_type(offy), difference_type(incy), viennacl::ocl::get_context(backend->opencl_backend.context_id)); + viennacl::matrix_base<float> mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), + size_type(m), size_type(offA_row), difference_type(incA_row), size_type(m), + size_type(n), size_type(offA_col), difference_type(incA_col), size_type(lda), order == ViennaCLRowMajor); + v2 *= beta; + if (transA == ViennaCLTrans) + v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); + else + v2 += alpha * viennacl::linalg::prod(mat, v1); + + return ViennaCLSuccess; +} + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemv(ViennaCLBackend backend, + ViennaCLOrder order, ViennaCLTranspose transA, + ViennaCLInt m, ViennaCLInt n, double alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + cl_mem x, ViennaCLInt offx, ViennaCLInt incx, + double beta, + cl_mem y, ViennaCLInt offy, ViennaCLInt incy) +{ + typedef viennacl::vector_base<double>::size_type size_type; + typedef viennacl::vector_base<double>::size_type difference_type; + + viennacl::vector_base<double> v1(x, size_type(n), size_type(offx), difference_type(incx), viennacl::ocl::get_context(backend->opencl_backend.context_id)); + viennacl::vector_base<double> v2(y, size_type(m), size_type(offy), difference_type(incy), viennacl::ocl::get_context(backend->opencl_backend.context_id)); + viennacl::matrix_base<double> mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), + size_type(m), size_type(offA_row), difference_type(incA_row), size_type(m), + size_type(n), size_type(offA_col), difference_type(incA_col), size_type(lda), order == ViennaCLRowMajor); + v2 *= beta; + if (transA == ViennaCLTrans) + v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); + else + v2 += alpha * viennacl::linalg::prod(mat, v1); + + return ViennaCLSuccess; +} + + + +// xTRSV + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLStrsv(ViennaCLBackend backend, + ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag, + ViennaCLInt n, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + cl_mem x, ViennaCLInt offx, ViennaCLInt incx) +{ + typedef viennacl::vector_base<float>::size_type size_type; + typedef viennacl::vector_base<float>::size_type difference_type; + + viennacl::vector_base<float> v(x, size_type(n), size_type(offx), difference_type(incx), viennacl::ocl::get_context(backend->opencl_backend.context_id)); + viennacl::matrix_base<float> mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), + size_type(n), size_type(offA_row), difference_type(incA_row), size_type(n), + size_type(n), size_type(offA_col), difference_type(incA_col), size_type(lda), order == ViennaCLRowMajor); + if (transA == ViennaCLTrans) + { + if (uplo == ViennaCLUpper) + if (diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_upper_tag()); + else + viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); + else + if (diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_lower_tag()); + else + viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); + } + else + { + if (uplo == ViennaCLUpper) + if (diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_upper_tag()); + else + viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); + else + if (diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_lower_tag()); + else + viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); + } + + return ViennaCLSuccess; +} + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDtrsv(ViennaCLBackend backend, + ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag, + ViennaCLInt n, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + cl_mem x, ViennaCLInt offx, ViennaCLInt incx) +{ + typedef viennacl::vector_base<double>::size_type size_type; + typedef viennacl::vector_base<double>::size_type difference_type; + + viennacl::vector_base<double> v(x, size_type(n), size_type(offx), difference_type(incx), viennacl::ocl::get_context(backend->opencl_backend.context_id)); + viennacl::matrix_base<double> mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), + size_type(n), size_type(offA_row), difference_type(incA_row), size_type(n), + size_type(n), size_type(offA_col), difference_type(incA_col), size_type(lda), order == ViennaCLRowMajor); + if (transA == ViennaCLTrans) + { + if (uplo == ViennaCLUpper) + if (diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_upper_tag()); + else + viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); + else + if (diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_lower_tag()); + else + viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); + } + else + { + if (uplo == ViennaCLUpper) + if (diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_upper_tag()); + else + viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); + else + if (diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_lower_tag()); + else + viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); + } + + return ViennaCLSuccess; +} + + + +// xGER + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSger(ViennaCLBackend backend, + ViennaCLOrder order, + ViennaCLInt m, ViennaCLInt n, + float alpha, + cl_mem x, ViennaCLInt offx, ViennaCLInt incx, + cl_mem y, ViennaCLInt offy, ViennaCLInt incy, + cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda) +{ + typedef viennacl::vector_base<float>::size_type size_type; + typedef viennacl::vector_base<float>::size_type difference_type; + + viennacl::vector_base<float> v1(x, size_type(n), size_type(offx), difference_type(incx), viennacl::ocl::get_context(backend->opencl_backend.context_id)); + viennacl::vector_base<float> v2(y, size_type(m), size_type(offy), difference_type(incy), viennacl::ocl::get_context(backend->opencl_backend.context_id)); + viennacl::matrix_base<float> mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), + size_type(m), size_type(offA_row), difference_type(incA_row), size_type(m), + size_type(n), size_type(offA_col), difference_type(incA_col), size_type(lda), order == ViennaCLRowMajor); + + mat += alpha * viennacl::linalg::outer_prod(v1, v2); + + return ViennaCLSuccess; +} + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDger(ViennaCLBackend backend, + ViennaCLOrder order, + ViennaCLInt m, ViennaCLInt n, + double alpha, + cl_mem x, ViennaCLInt offx, ViennaCLInt incx, + cl_mem y, ViennaCLInt offy, ViennaCLInt incy, + cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda) +{ + typedef viennacl::vector_base<double>::size_type size_type; + typedef viennacl::vector_base<double>::size_type difference_type; + + viennacl::vector_base<double> v1(x, size_type(n), size_type(offx), difference_type(incx), viennacl::ocl::get_context(backend->opencl_backend.context_id)); + viennacl::vector_base<double> v2(y, size_type(m), size_type(offy), difference_type(incy), viennacl::ocl::get_context(backend->opencl_backend.context_id)); + viennacl::matrix_base<double> mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), + size_type(m), size_type(offA_row), difference_type(incA_row), size_type(m), + size_type(n), size_type(offA_col), difference_type(incA_col), size_type(lda), order == ViennaCLRowMajor); + + mat += alpha * viennacl::linalg::outer_prod(v1, v2); + + return ViennaCLSuccess; +} +
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cpp b/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cpp new file mode 100644 index 0000000..bb6e03e --- /dev/null +++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cpp @@ -0,0 +1,272 @@ +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +// include necessary system headers +#include <iostream> + +#include "viennacl.hpp" +#include "viennacl_private.hpp" + +#include "init_matrix.hpp" + +//include basic scalar and vector types of ViennaCL +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/linalg/direct_solve.hpp" +#include "viennacl/linalg/prod.hpp" + +// GEMV + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemm(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLMatrix B, ViennaCLHostScalar beta, ViennaCLMatrix C) +{ + viennacl::backend::mem_handle A_handle; + viennacl::backend::mem_handle B_handle; + viennacl::backend::mem_handle C_handle; + + if (init_matrix(A_handle, A) != ViennaCLSuccess) + return ViennaCLGenericFailure; + + if (init_matrix(B_handle, B) != ViennaCLSuccess) + return ViennaCLGenericFailure; + + if (init_matrix(C_handle, C) != ViennaCLSuccess) + return ViennaCLGenericFailure; + + switch (A->precision) + { + case ViennaCLFloat: + { + typedef viennacl::matrix_base<float>::size_type size_type; + typedef viennacl::matrix_base<float>::size_type difference_type; + + viennacl::matrix_base<float> mat_A(A_handle, + size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1), + size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor); + viennacl::matrix_base<float> mat_B(B_handle, + size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1), + size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor); + viennacl::matrix_base<float> mat_C(C_handle, + size_type(C->size1), size_type(C->start1), difference_type(C->stride1), size_type(C->internal_size1), + size_type(C->size2), size_type(C->start2), difference_type(C->stride2), size_type(C->internal_size2), C->order == ViennaCLRowMajor); + + if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) + viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); + else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) + viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) + viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) + viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); + else + return ViennaCLGenericFailure; + + return ViennaCLSuccess; + } + + case ViennaCLDouble: + { + typedef viennacl::matrix_base<double>::size_type size_type; + typedef viennacl::matrix_base<double>::size_type difference_type; + + viennacl::matrix_base<double> mat_A(A_handle, + size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1), + size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor); + viennacl::matrix_base<double> mat_B(B_handle, + size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1), + size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor); + viennacl::matrix_base<double> mat_C(C_handle, + size_type(C->size1), size_type(C->start1), difference_type(C->stride1), size_type(C->internal_size1), + size_type(C->size2), size_type(C->start2), difference_type(C->stride2), size_type(C->internal_size2), C->order == ViennaCLRowMajor); + + if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) + viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); + else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) + viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) + viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) + viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); + else + return ViennaCLGenericFailure; + + return ViennaCLSuccess; + } + + default: + return ViennaCLGenericFailure; + } +} + + +// xTRSV + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsm(ViennaCLMatrix A, ViennaCLUplo uplo, ViennaCLDiag diag, ViennaCLMatrix B) +{ + viennacl::backend::mem_handle A_handle; + viennacl::backend::mem_handle B_handle; + + if (init_matrix(A_handle, A) != ViennaCLSuccess) + return ViennaCLGenericFailure; + + if (init_matrix(B_handle, B) != ViennaCLSuccess) + return ViennaCLGenericFailure; + + switch (A->precision) + { + case ViennaCLFloat: + { + typedef viennacl::matrix_base<float>::size_type size_type; + typedef viennacl::matrix_base<float>::size_type difference_type; + + viennacl::matrix_base<float> mat_A(A_handle, + size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1), + size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor); + viennacl::matrix_base<float> mat_B(B_handle, + size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1), + size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor); + + if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + + return ViennaCLSuccess; + } + case ViennaCLDouble: + { + typedef viennacl::matrix_base<double>::size_type size_type; + typedef viennacl::matrix_base<double>::size_type difference_type; + + viennacl::matrix_base<double> mat_A(A_handle, + size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1), + size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor); + viennacl::matrix_base<double> mat_B(B_handle, + size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1), + size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor); + + if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + + return ViennaCLSuccess; + } + + default: + return ViennaCLGenericFailure; + } +} + + + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cu ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cu b/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cu new file mode 100644 index 0000000..bb6e03e --- /dev/null +++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cu @@ -0,0 +1,272 @@ +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +// include necessary system headers +#include <iostream> + +#include "viennacl.hpp" +#include "viennacl_private.hpp" + +#include "init_matrix.hpp" + +//include basic scalar and vector types of ViennaCL +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/linalg/direct_solve.hpp" +#include "viennacl/linalg/prod.hpp" + +// GEMV + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemm(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLMatrix B, ViennaCLHostScalar beta, ViennaCLMatrix C) +{ + viennacl::backend::mem_handle A_handle; + viennacl::backend::mem_handle B_handle; + viennacl::backend::mem_handle C_handle; + + if (init_matrix(A_handle, A) != ViennaCLSuccess) + return ViennaCLGenericFailure; + + if (init_matrix(B_handle, B) != ViennaCLSuccess) + return ViennaCLGenericFailure; + + if (init_matrix(C_handle, C) != ViennaCLSuccess) + return ViennaCLGenericFailure; + + switch (A->precision) + { + case ViennaCLFloat: + { + typedef viennacl::matrix_base<float>::size_type size_type; + typedef viennacl::matrix_base<float>::size_type difference_type; + + viennacl::matrix_base<float> mat_A(A_handle, + size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1), + size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor); + viennacl::matrix_base<float> mat_B(B_handle, + size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1), + size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor); + viennacl::matrix_base<float> mat_C(C_handle, + size_type(C->size1), size_type(C->start1), difference_type(C->stride1), size_type(C->internal_size1), + size_type(C->size2), size_type(C->start2), difference_type(C->stride2), size_type(C->internal_size2), C->order == ViennaCLRowMajor); + + if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) + viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); + else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) + viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) + viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) + viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); + else + return ViennaCLGenericFailure; + + return ViennaCLSuccess; + } + + case ViennaCLDouble: + { + typedef viennacl::matrix_base<double>::size_type size_type; + typedef viennacl::matrix_base<double>::size_type difference_type; + + viennacl::matrix_base<double> mat_A(A_handle, + size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1), + size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor); + viennacl::matrix_base<double> mat_B(B_handle, + size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1), + size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor); + viennacl::matrix_base<double> mat_C(C_handle, + size_type(C->size1), size_type(C->start1), difference_type(C->stride1), size_type(C->internal_size1), + size_type(C->size2), size_type(C->start2), difference_type(C->stride2), size_type(C->internal_size2), C->order == ViennaCLRowMajor); + + if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) + viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); + else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) + viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) + viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) + viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); + else + return ViennaCLGenericFailure; + + return ViennaCLSuccess; + } + + default: + return ViennaCLGenericFailure; + } +} + + +// xTRSV + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsm(ViennaCLMatrix A, ViennaCLUplo uplo, ViennaCLDiag diag, ViennaCLMatrix B) +{ + viennacl::backend::mem_handle A_handle; + viennacl::backend::mem_handle B_handle; + + if (init_matrix(A_handle, A) != ViennaCLSuccess) + return ViennaCLGenericFailure; + + if (init_matrix(B_handle, B) != ViennaCLSuccess) + return ViennaCLGenericFailure; + + switch (A->precision) + { + case ViennaCLFloat: + { + typedef viennacl::matrix_base<float>::size_type size_type; + typedef viennacl::matrix_base<float>::size_type difference_type; + + viennacl::matrix_base<float> mat_A(A_handle, + size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1), + size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor); + viennacl::matrix_base<float> mat_B(B_handle, + size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1), + size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor); + + if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + + return ViennaCLSuccess; + } + case ViennaCLDouble: + { + typedef viennacl::matrix_base<double>::size_type size_type; + typedef viennacl::matrix_base<double>::size_type difference_type; + + viennacl::matrix_base<double> mat_A(A_handle, + size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1), + size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor); + viennacl::matrix_base<double> mat_B(B_handle, + size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1), + size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor); + + if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) + { + if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); + else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); + else if (uplo == ViennaCLLower && diag == ViennaCLUnit) + viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); + else + return ViennaCLGenericFailure; + } + + return ViennaCLSuccess; + } + + default: + return ViennaCLGenericFailure; + } +} + + + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/blas3.hpp new file mode 100644 index 0000000..cfcc034 --- /dev/null +++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3.hpp @@ -0,0 +1,60 @@ +#ifndef VIENNACL_SRC_BLAS3_HPP +#define VIENNACL_SRC_BLAS3_HPP + +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +// include necessary system headers +#include <iostream> + +#include "viennacl.hpp" +#include "viennacl_private.hpp" + +//include basic scalar and vector types of ViennaCL +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" + +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/linalg/direct_solve.hpp" +#include "viennacl/linalg/prod.hpp" + +namespace detail +{ + template <typename ScalarType, typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC> + void gemm_dispatch(ScalarType alpha, + MatrixTypeA const & A, ViennaCLTranspose transA, + MatrixTypeB const & B, ViennaCLTranspose transB, + ScalarType beta, + MatrixTypeC & C) + { + + if (transA == ViennaCLTrans && transB == ViennaCLTrans) + viennacl::linalg::prod_impl(viennacl::trans(A), viennacl::trans(B), C, alpha, beta); + else if (transA == ViennaCLTrans && transB == ViennaCLNoTrans) + viennacl::linalg::prod_impl(viennacl::trans(A), B, C, alpha, beta); + else if (transA == ViennaCLNoTrans && transB == ViennaCLTrans) + viennacl::linalg::prod_impl(A, viennacl::trans(B), C, alpha, beta); + else if (transA == ViennaCLNoTrans && transB == ViennaCLNoTrans) + viennacl::linalg::prod_impl(A, B, C, alpha, beta); + //else + // return ViennaCLGenericFailure; + } +} + + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3_cuda.cu ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3_cuda.cu b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_cuda.cu new file mode 100644 index 0000000..318593b --- /dev/null +++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_cuda.cu @@ -0,0 +1,133 @@ +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +// include necessary system headers +#include <iostream> + +#include "viennacl.hpp" +#include "viennacl_private.hpp" + +#include "blas3.hpp" + +//include basic scalar and vector types of ViennaCL +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/linalg/direct_solve.hpp" +#include "viennacl/linalg/prod.hpp" + + +#ifdef VIENNACL_WITH_CUDA + + + +// +// xGEMV +// + +namespace detail +{ + template <typename NumericT> + ViennaCLStatus ViennaCLCUDAgemm_impl(ViennaCLBackend /*backend*/, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + NumericT alpha, + NumericT *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + NumericT *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + NumericT beta, + NumericT *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) + { + ViennaCLInt A_size1 = (transA == ViennaCLTrans) ? k : m; + ViennaCLInt A_size2 = (transA == ViennaCLTrans) ? m : k; + + ViennaCLInt B_size1 = (transB == ViennaCLTrans) ? n : k; + ViennaCLInt B_size2 = (transB == ViennaCLTrans) ? k : n; + + bool A_row_major = (orderA == ViennaCLRowMajor); + bool B_row_major = (orderB == ViennaCLRowMajor); + bool C_row_major = (orderC == ViennaCLRowMajor); + + viennacl::matrix_base<NumericT> matA(A, viennacl::CUDA_MEMORY, + A_size1, offA_row, incA_row, A_row_major ? m : lda, + A_size2, offA_col, incA_col, A_row_major ? lda : k, A_row_major); + + viennacl::matrix_base<NumericT> matB(B, viennacl::CUDA_MEMORY, + B_size1, offB_row, incB_row, B_row_major ? k : ldb, + B_size2, offB_col, incB_col, B_row_major ? ldb : n, B_row_major); + + viennacl::matrix_base<NumericT> matC(C, viennacl::CUDA_MEMORY, + m, offC_row, incC_row, C_row_major ? m : ldc, + n, offC_col, incC_col, C_row_major ? ldc : n, C_row_major); + + detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); + + return ViennaCLSuccess; + } + +} + + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASgemm(ViennaCLBackend backend, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + float alpha, + float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + float beta, + float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) +{ + return detail::ViennaCLCUDAgemm_impl<float>(backend, + orderA, transA, + orderB, transB, + orderC, + m, n, k, + alpha, + A, offA_row, offA_col, incA_row, incA_col, lda, + B, offB_row, offB_col, incB_row, incB_col, ldb, + beta, + C, offC_row, offC_col, incC_row, incC_col, ldc); +} + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADgemm(ViennaCLBackend backend, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + double alpha, + double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + double beta, + double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) +{ + return detail::ViennaCLCUDAgemm_impl<double>(backend, + orderA, transA, + orderB, transB, + orderC, + m, n, k, + alpha, + A, offA_row, offA_col, incA_row, incA_col, lda, + B, offB_row, offB_col, incB_row, incB_col, ldb, + beta, + C, offC_row, offC_col, incC_row, incC_col, ldc); +} + + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cpp b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cpp new file mode 100644 index 0000000..16ef310 --- /dev/null +++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cpp @@ -0,0 +1,131 @@ +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +// include necessary system headers +#include <iostream> + +#include "viennacl.hpp" +#include "viennacl_private.hpp" + +#include "blas3.hpp" + +//include basic scalar and vector types of ViennaCL +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/linalg/direct_solve.hpp" +#include "viennacl/linalg/prod.hpp" + + +// +// xGEMV +// + +namespace detail +{ + template <typename NumericT> + ViennaCLStatus ViennaCLHostgemm_impl(ViennaCLBackend /*backend*/, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + NumericT alpha, + NumericT *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + NumericT *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + NumericT beta, + NumericT *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) + { + typedef typename viennacl::matrix_base<NumericT>::size_type size_type; + typedef typename viennacl::matrix_base<NumericT>::size_type difference_type; + + size_type A_size1 = static_cast<size_type>((transA == ViennaCLTrans) ? k : m); + size_type A_size2 = static_cast<size_type>((transA == ViennaCLTrans) ? m : k); + + size_type B_size1 = static_cast<size_type>((transB == ViennaCLTrans) ? n : k); + size_type B_size2 = static_cast<size_type>((transB == ViennaCLTrans) ? k : n); + + bool A_row_major = (orderA == ViennaCLRowMajor); + bool B_row_major = (orderB == ViennaCLRowMajor); + bool C_row_major = (orderC == ViennaCLRowMajor); + + viennacl::matrix_base<NumericT> matA(A, viennacl::MAIN_MEMORY, + A_size1, size_type(offA_row), difference_type(incA_row), size_type(A_row_major ? m : lda), + A_size2, size_type(offA_col), difference_type(incA_col), size_type(A_row_major ? lda : k), A_row_major); + + viennacl::matrix_base<NumericT> matB(B, viennacl::MAIN_MEMORY, + B_size1, size_type(offB_row), difference_type(incB_row), size_type(B_row_major ? k : ldb), + B_size2, size_type(offB_col), difference_type(incB_col), size_type(B_row_major ? ldb : n), B_row_major); + + viennacl::matrix_base<NumericT> matC(C, viennacl::MAIN_MEMORY, + size_type(m), size_type(offC_row), difference_type(incC_row), size_type(C_row_major ? m : ldc), + size_type(n), size_type(offC_col), difference_type(incC_col), size_type(C_row_major ? ldc : n), C_row_major); + + detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); + + return ViennaCLSuccess; + } + +} + + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemm(ViennaCLBackend backend, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + float alpha, + float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + float beta, + float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) +{ + return detail::ViennaCLHostgemm_impl<float>(backend, + orderA, transA, + orderB, transB, + orderC, + m, n, k, + alpha, + A, offA_row, offA_col, incA_row, incA_col, lda, + B, offB_row, offB_col, incB_row, incB_col, ldb, + beta, + C, offC_row, offC_col, incC_row, incC_col, ldc); +} + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemm(ViennaCLBackend backend, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + double alpha, + double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + double beta, + double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) +{ + return detail::ViennaCLHostgemm_impl<double>(backend, + orderA, transA, + orderB, transB, + orderC, + m, n, k, + alpha, + A, offA_row, offA_col, incA_row, incA_col, lda, + B, offB_row, offB_col, incB_row, incB_col, ldb, + beta, + C, offC_row, offC_col, incC_row, incC_col, ldc); +} + + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cu ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cu b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cu new file mode 100644 index 0000000..16ef310 --- /dev/null +++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cu @@ -0,0 +1,131 @@ +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +// include necessary system headers +#include <iostream> + +#include "viennacl.hpp" +#include "viennacl_private.hpp" + +#include "blas3.hpp" + +//include basic scalar and vector types of ViennaCL +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/linalg/direct_solve.hpp" +#include "viennacl/linalg/prod.hpp" + + +// +// xGEMV +// + +namespace detail +{ + template <typename NumericT> + ViennaCLStatus ViennaCLHostgemm_impl(ViennaCLBackend /*backend*/, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + NumericT alpha, + NumericT *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + NumericT *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + NumericT beta, + NumericT *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) + { + typedef typename viennacl::matrix_base<NumericT>::size_type size_type; + typedef typename viennacl::matrix_base<NumericT>::size_type difference_type; + + size_type A_size1 = static_cast<size_type>((transA == ViennaCLTrans) ? k : m); + size_type A_size2 = static_cast<size_type>((transA == ViennaCLTrans) ? m : k); + + size_type B_size1 = static_cast<size_type>((transB == ViennaCLTrans) ? n : k); + size_type B_size2 = static_cast<size_type>((transB == ViennaCLTrans) ? k : n); + + bool A_row_major = (orderA == ViennaCLRowMajor); + bool B_row_major = (orderB == ViennaCLRowMajor); + bool C_row_major = (orderC == ViennaCLRowMajor); + + viennacl::matrix_base<NumericT> matA(A, viennacl::MAIN_MEMORY, + A_size1, size_type(offA_row), difference_type(incA_row), size_type(A_row_major ? m : lda), + A_size2, size_type(offA_col), difference_type(incA_col), size_type(A_row_major ? lda : k), A_row_major); + + viennacl::matrix_base<NumericT> matB(B, viennacl::MAIN_MEMORY, + B_size1, size_type(offB_row), difference_type(incB_row), size_type(B_row_major ? k : ldb), + B_size2, size_type(offB_col), difference_type(incB_col), size_type(B_row_major ? ldb : n), B_row_major); + + viennacl::matrix_base<NumericT> matC(C, viennacl::MAIN_MEMORY, + size_type(m), size_type(offC_row), difference_type(incC_row), size_type(C_row_major ? m : ldc), + size_type(n), size_type(offC_col), difference_type(incC_col), size_type(C_row_major ? ldc : n), C_row_major); + + detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); + + return ViennaCLSuccess; + } + +} + + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemm(ViennaCLBackend backend, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + float alpha, + float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + float beta, + float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) +{ + return detail::ViennaCLHostgemm_impl<float>(backend, + orderA, transA, + orderB, transB, + orderC, + m, n, k, + alpha, + A, offA_row, offA_col, incA_row, incA_col, lda, + B, offB_row, offB_col, incB_row, incB_col, ldb, + beta, + C, offC_row, offC_col, incC_row, incC_col, ldc); +} + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemm(ViennaCLBackend backend, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + double alpha, + double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + double beta, + double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) +{ + return detail::ViennaCLHostgemm_impl<double>(backend, + orderA, transA, + orderB, transB, + orderC, + m, n, k, + alpha, + A, offA_row, offA_col, incA_row, incA_col, lda, + B, offB_row, offB_col, incB_row, incB_col, ldb, + beta, + C, offC_row, offC_col, incC_row, incC_col, ldc); +} + + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cpp b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cpp new file mode 100644 index 0000000..d5e5c1e --- /dev/null +++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cpp @@ -0,0 +1,136 @@ +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +// include necessary system headers +#include <iostream> + +#include "viennacl.hpp" +#include "viennacl_private.hpp" + +#include "blas3.hpp" + +//include basic scalar and vector types of ViennaCL +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/linalg/direct_solve.hpp" +#include "viennacl/linalg/prod.hpp" + + +#ifdef VIENNACL_WITH_OPENCL + + + +// +// xGEMV +// + +namespace detail +{ + template <typename NumericT> + ViennaCLStatus ViennaCLOpenCLgemm_impl(ViennaCLBackend backend, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + NumericT alpha, + cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + NumericT beta, + cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) + { + typedef typename viennacl::matrix_base<NumericT>::size_type size_type; + typedef typename viennacl::matrix_base<NumericT>::size_type difference_type; + + size_type A_size1 = static_cast<size_type>((transA == ViennaCLTrans) ? k : m); + size_type A_size2 = static_cast<size_type>((transA == ViennaCLTrans) ? m : k); + + size_type B_size1 = static_cast<size_type>((transB == ViennaCLTrans) ? n : k); + size_type B_size2 = static_cast<size_type>((transB == ViennaCLTrans) ? k : n); + + bool A_row_major = (orderA == ViennaCLRowMajor); + bool B_row_major = (orderB == ViennaCLRowMajor); + bool C_row_major = (orderC == ViennaCLRowMajor); + + viennacl::matrix_base<NumericT> matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), + A_size1, size_type(offA_row), difference_type(incA_row), size_type(A_row_major ? m : lda), + A_size2, size_type(offA_col), difference_type(incA_col), size_type(A_row_major ? lda : k), A_row_major); + + viennacl::matrix_base<NumericT> matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), + B_size1, size_type(offB_row), difference_type(incB_row), size_type(B_row_major ? k : ldb), + B_size2, size_type(offB_col), difference_type(incB_col), size_type(B_row_major ? ldb : n), B_row_major); + + viennacl::matrix_base<NumericT> matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), + size_type(m), size_type(offC_row), difference_type(incC_row), size_type(C_row_major ? m : ldc), + size_type(n), size_type(offC_col), difference_type(incC_col), size_type(C_row_major ? ldc : n), C_row_major); + + detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); + + return ViennaCLSuccess; + } + +} + + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemm(ViennaCLBackend backend, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + float alpha, + cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + float beta, + cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) +{ + return detail::ViennaCLOpenCLgemm_impl<float>(backend, + orderA, transA, + orderB, transB, + orderC, + m, n, k, + alpha, + A, offA_row, offA_col, incA_row, incA_col, lda, + B, offB_row, offB_col, incB_row, incB_col, ldb, + beta, + C, offC_row, offC_col, incC_row, incC_col, ldc); +} + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemm(ViennaCLBackend backend, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + double alpha, + cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + double beta, + cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) +{ + return detail::ViennaCLOpenCLgemm_impl<double>(backend, + orderA, transA, + orderB, transB, + orderC, + m, n, k, + alpha, + A, offA_row, offA_col, incA_row, incA_col, lda, + B, offB_row, offB_col, incB_row, incB_col, ldb, + beta, + C, offC_row, offC_col, incC_row, incC_col, ldc); +} + + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cu ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cu b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cu new file mode 100644 index 0000000..d5e5c1e --- /dev/null +++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cu @@ -0,0 +1,136 @@ +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +// include necessary system headers +#include <iostream> + +#include "viennacl.hpp" +#include "viennacl_private.hpp" + +#include "blas3.hpp" + +//include basic scalar and vector types of ViennaCL +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/linalg/direct_solve.hpp" +#include "viennacl/linalg/prod.hpp" + + +#ifdef VIENNACL_WITH_OPENCL + + + +// +// xGEMV +// + +namespace detail +{ + template <typename NumericT> + ViennaCLStatus ViennaCLOpenCLgemm_impl(ViennaCLBackend backend, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + NumericT alpha, + cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + NumericT beta, + cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) + { + typedef typename viennacl::matrix_base<NumericT>::size_type size_type; + typedef typename viennacl::matrix_base<NumericT>::size_type difference_type; + + size_type A_size1 = static_cast<size_type>((transA == ViennaCLTrans) ? k : m); + size_type A_size2 = static_cast<size_type>((transA == ViennaCLTrans) ? m : k); + + size_type B_size1 = static_cast<size_type>((transB == ViennaCLTrans) ? n : k); + size_type B_size2 = static_cast<size_type>((transB == ViennaCLTrans) ? k : n); + + bool A_row_major = (orderA == ViennaCLRowMajor); + bool B_row_major = (orderB == ViennaCLRowMajor); + bool C_row_major = (orderC == ViennaCLRowMajor); + + viennacl::matrix_base<NumericT> matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), + A_size1, size_type(offA_row), difference_type(incA_row), size_type(A_row_major ? m : lda), + A_size2, size_type(offA_col), difference_type(incA_col), size_type(A_row_major ? lda : k), A_row_major); + + viennacl::matrix_base<NumericT> matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), + B_size1, size_type(offB_row), difference_type(incB_row), size_type(B_row_major ? k : ldb), + B_size2, size_type(offB_col), difference_type(incB_col), size_type(B_row_major ? ldb : n), B_row_major); + + viennacl::matrix_base<NumericT> matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), + size_type(m), size_type(offC_row), difference_type(incC_row), size_type(C_row_major ? m : ldc), + size_type(n), size_type(offC_col), difference_type(incC_col), size_type(C_row_major ? ldc : n), C_row_major); + + detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); + + return ViennaCLSuccess; + } + +} + + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemm(ViennaCLBackend backend, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + float alpha, + cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + float beta, + cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) +{ + return detail::ViennaCLOpenCLgemm_impl<float>(backend, + orderA, transA, + orderB, transB, + orderC, + m, n, k, + alpha, + A, offA_row, offA_col, incA_row, incA_col, lda, + B, offB_row, offB_col, incB_row, incB_col, ldb, + beta, + C, offC_row, offC_col, incC_row, incC_col, ldc); +} + +VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemm(ViennaCLBackend backend, + ViennaCLOrder orderA, ViennaCLTranspose transA, + ViennaCLOrder orderB, ViennaCLTranspose transB, + ViennaCLOrder orderC, + ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, + double alpha, + cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, + cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, + double beta, + cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) +{ + return detail::ViennaCLOpenCLgemm_impl<double>(backend, + orderA, transA, + orderB, transB, + orderC, + m, n, k, + alpha, + A, offA_row, offA_col, incA_row, incA_col, lda, + B, offB_row, offB_col, incB_row, incB_col, ldb, + beta, + C, offC_row, offC_col, incC_row, incC_col, ldc); +} + + +#endif
