http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/gtest/gtest_main.cc ---------------------------------------------------------------------- diff --git a/include/gtest/gtest_main.cc b/include/gtest/gtest_main.cc deleted file mode 100644 index f302822..0000000 --- a/include/gtest/gtest_main.cc +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2006, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include <stdio.h> - -#include "gtest/gtest.h" - -GTEST_API_ int main(int argc, char **argv) { - printf("Running main() from gtest_main.cc\n"); - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/cuda/cuda_reduce.cuh ---------------------------------------------------------------------- diff --git a/include/mshadow/cuda/cuda_reduce.cuh b/include/mshadow/cuda/cuda_reduce.cuh deleted file mode 100644 index b7808a6..0000000 --- a/include/mshadow/cuda/cuda_reduce.cuh +++ /dev/null @@ -1,117 +0,0 @@ -#ifndef MSHADOW_CUDA_REDUCE_CUH -#define MSHADOW_CUDA_REDUCE_CUH -/*! - * \file cuda_reduce.cuh - * \brief helper functions to do reduction - * \author Tianqi Chen - */ -namespace mshadow{ - namespace cuda{ - /* - * \brief reduce over the dimension x - * \tparam Reducer reducer - * \tparam x_bits dimension = 1<<x_bits - */ - template<typename Reducer,int x_bits> - inline __device__ void Reduce1D( volatile real_t buf[1<<x_bits] ); - /* - * \brief reduce over the dimension x - * \tparam Reducer reducer - * \tparam xmax_bits maximum size of buffer - * \param xsize size of x dimension, not sure if aligned - */ - template<typename Reducer, int xmax_bits> - inline __device__ void Reduce1DNotAlign( volatile real_t buf[1<<xmax_bits], int xsize ); - }; -}; - -// ===============================================x=== -// implementations afterwards, -// no need to read if only use the functions -// -------------------------------------------------- -#ifdef __DEVICE_EMULATION__ -#define __MSHADOW_EMUSYNC__ __syncthreads() -#else -#define __MSHADOW_EMUSYNC__ -#endif - -namespace mshadow{ - namespace cuda{ - template<typename Reducer, int x_bits> - inline __device__ void ReduceX( volatile real_t buf[], int tid ){ - if( x_bits >= 10 ){ - if( tid < 512 ) Reducer::Reduce( buf[tid] , buf[tid + 512] ); - __syncthreads(); - } - if( x_bits >= 9 ){ - if( tid < 256 ) Reducer::Reduce( buf[tid] , buf[tid + 256] ); - __syncthreads(); - } - if( x_bits >= 8 ){ - if( tid < 128 ) Reducer::Reduce( buf[tid] , buf[tid + 128] ); - __syncthreads(); - } - if( x_bits >= 7 ){ - if( tid < 64 ) Reducer::Reduce( buf[tid] , buf[tid + 64 ] ); - __syncthreads(); - } - if( x_bits >= 6 ){ - if( tid < 32 ) Reducer::Reduce( buf[tid] , buf[tid + 32] ); - __syncthreads(); - } - // in warp optimization - if( x_bits >= 5 ){ - if( tid < 16 ) Reducer::Reduce( buf[tid] , buf[tid + 16] ); - __MSHADOW_EMUSYNC__; - } - if( x_bits >= 4 ){ - if( tid < 8 ) Reducer::Reduce( buf[tid] , buf[tid + 8 ] ); - __MSHADOW_EMUSYNC__; - } - if( x_bits >= 3 ){ - if( tid < 4 ) Reducer::Reduce( buf[tid] , buf[tid + 4 ] ); - __MSHADOW_EMUSYNC__; - } - if( x_bits >= 2 ){ - if( tid < 2 ) Reducer::Reduce( buf[tid] , buf[tid + 2 ] ); - __MSHADOW_EMUSYNC__; - } - if( x_bits >= 1 ){ - if( tid < 1 ) Reducer::Reduce( buf[tid] , buf[tid + 1 ] ); - __MSHADOW_EMUSYNC__; - } - }; - - template<typename Reducer,int x_bits> - inline __device__ void Reduce1D( volatile real_t buf[1<<x_bits] ){ - ReduceX<Reducer,x_bits>( buf, threadIdx.x ); - } - - // reduce with a upper bound - #define __RD_NON_ALIGN(els,x_bits) \ - els \ - if( xmax_bits >= x_bits && x_size >= (1 << x_bits) ){ \ - if( tid < (1 << x_bits) && tid + (1<<x_bits) < x_size ){ \ - Reducer::Reduce( buf[tid] , buf[tid + (1<<x_bits)] ); \ - } \ - __syncthreads(); \ - ReduceX<Reducer, x_bits>( buf, tid ); \ - } \ - - template<typename Reducer, int xmax_bits> - inline __device__ void Reduce1DNotAlign( volatile real_t buf[], int x_size ){ - int tid = threadIdx.x; - __RD_NON_ALIGN(, 8) - __RD_NON_ALIGN(else, 7) - __RD_NON_ALIGN(else, 6) - __RD_NON_ALIGN(else, 5) - __RD_NON_ALIGN(else, 4) - __RD_NON_ALIGN(else, 3) - __RD_NON_ALIGN(else, 2) - __RD_NON_ALIGN(else, 1) - } - }; -}; - -#endif // MSHADOW_CUDA_REDUCE_CUH - http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/cuda/tensor_gpu-inl.cuh ---------------------------------------------------------------------- diff --git a/include/mshadow/cuda/tensor_gpu-inl.cuh b/include/mshadow/cuda/tensor_gpu-inl.cuh deleted file mode 100644 index 61e477c..0000000 --- a/include/mshadow/cuda/tensor_gpu-inl.cuh +++ /dev/null @@ -1,231 +0,0 @@ -#ifndef MSHADOW_TENSOR_GPU_INL_CUH -#define MSHADOW_TENSOR_GPU_INL_CUH -/*! - * \file tensor_gpu-inl.cuh - * \brief implementation of GPU code using CUDA - * \author Bing Xu, Tianqi Chen - */ -#include "../tensor.h" -#include "cuda_reduce.cuh" - -namespace mshadow{ - namespace cuda{ - #ifndef __CUDA_ARCH__ - #warning "__CUDA_ARCH__ is not defined, I will assume compiling with CUDA verion greater than 2.0" - #endif - /* load unit for memory access */ - #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 200 - const int kMemUnitBits = 5; - const int kMaxThreadsPerBlock = 1024; - #else - const int kMemUnitBits = 4; - const int kMaxThreadsPerBlock = 512; - #endif - /*! \brief number of units that can do synchronized update, half warp size */ - const int kMemUnit = 1 << kMemUnitBits; - /*! \brief mask that could be helpful sometime */ - const int kMemUnitMask = kMemUnit - 1; - /*! \brief suggested thread number(logscale) for mapping kernel */ - const int kBaseThreadBits = 8; - /*! \brief suggested thread number for mapping kernel */ - const int kBaseThreadNum = 1 << kBaseThreadBits; - /*! \brief maximum value of grid */ - const int kMaxGridNum = 65535; - /*! \brief suggested grid number for mapping kernel */ - const int kBaseGridNum = 1024; - - /*! \brief get align stride for given size in x dimension */ - inline index_t GetAlignStride( index_t xsize, index_t xstride ){ - if( (xstride & (kMemUnit-1)) == 0 ){ - return ( (xsize + kMemUnit - 1) >> kMemUnitBits) << kMemUnitBits; - }else{ - // if originally space is not aligned, no necessary to to alligned thread allocation - return xsize; - } - } - inline void CheckLaunchParam( dim3 dimGrid, dim3 dimBlock, const char *estr = "" ){ - if( dimBlock.x*dimBlock.y*dimBlock.z > (unsigned)kMaxThreadsPerBlock || - dimGrid.x > 65535 || dimGrid.y > 65535 ){ - fprintf( stderr, "%s[%u,%u,%u]:", estr, dimBlock.x, dimBlock.y, dimBlock.z ); - utils::Error( "too large launch parameter\n"); - } - } - }; - - namespace cuda { - template<typename Saver, typename Plan, int block_dim_bits> - __device__ void MapPlanProc( Tensor<gpu,2> dst, const index_t xstride, const Plan exp, int block_idx ){ - const index_t tid = (block_idx << block_dim_bits) + threadIdx.x; - const int y = tid / xstride; - const int x = tid % xstride; - if (y < dst.shape[1] && x < dst.shape[0]) { - Saver::Save(dst[y][x], exp.Eval(y,x)); - } - } - template<typename Saver, typename Plan, int block_dim_bits> - __global__ void MapPlanKernel( Tensor<gpu,2> dst, const index_t xstride, const Plan exp ){ - MapPlanProc<Saver, Plan,block_dim_bits>( dst, xstride, exp, blockIdx.x ); - } - template<typename Saver, typename Plan, int block_dim_bits, int grid_size> - __global__ void MapPlanLargeKernel( Tensor<gpu,2> dst, const index_t xstride, const Plan exp, int repeat ){ - for( int i = 0; i < repeat; ++i ){ - MapPlanProc<Saver, Plan,block_dim_bits>( dst, xstride, exp, blockIdx.x + i*grid_size ); - } - } - - template<typename Saver, typename E> - inline void MapPlan( Tensor<gpu,2> dst, const expr::Plan<E> &plan ){ - const index_t xstride = GetAlignStride( dst.shape[0], dst.shape.stride_ ); - const int num_block = ( dst.shape[1]*xstride + kBaseThreadNum-1) / kBaseThreadNum; - dim3 dimBlock(kBaseThreadNum, 1, 1); - - if (num_block < kMaxGridNum) { - dim3 dimGrid(num_block, 1, 1); - MapPlanKernel<Saver, expr::Plan<E>, kBaseThreadBits> \ - <<<dimGrid,dimBlock>>>(dst, xstride, plan); - } else { - int repeat = (num_block + kBaseGridNum-1) / kBaseGridNum; - dim3 dimGrid( kBaseGridNum, 1 , 1 ); - MapPlanLargeKernel<Saver,expr::Plan<E>, kBaseThreadBits, kBaseGridNum> \ - <<<dimGrid,dimBlock>>>(dst, xstride, plan, repeat ); - } - } - }; // namespace cuda - - namespace cuda{ - template<typename Saver,typename Reducer, int warp_bits, typename Plan> - __global__ void MapRedKeepLowestKernel( Tensor<gpu,1> dst, Plan plan, real_t scale, Shape<2> eshape ){ - const unsigned warp_size = 1 << warp_bits; - const unsigned x = (blockIdx.x<<warp_bits) + threadIdx.x; - // to avoid bank conflict - __shared__ real_t s_res[ warp_size ][ warp_size + 1 ]; - - // note: reverse store [y][x], so that we can reduce over threadIdx.x, use warp optimization - if( threadIdx.y < eshape[1] && x < eshape[0] ){ - s_res[ threadIdx.x ][ threadIdx.y ] = plan.Eval( threadIdx.y, x ); - } - for( unsigned y = warp_size; y < eshape[1]; y += warp_size ){ - if( threadIdx.y + y < eshape[1] && x < eshape[0] ){ - Reducer::Reduce( s_res[ threadIdx.x ][ threadIdx.y ], plan.Eval( threadIdx.y + y, x ) ); - } - } - __syncthreads(); - if( eshape[1] >= warp_size ){ - Reduce1D<Reducer,warp_bits>( s_res[ threadIdx.y ] ); - }else{ - Reduce1DNotAlign<Reducer,warp_bits>( s_res[ threadIdx.y ], eshape[1] ); - } - __syncthreads(); - - if( threadIdx.y == 0 && x < eshape[0] ){ - Saver::Save( dst[x], s_res[ threadIdx.x ][ 0 ] * scale ); - } - } - - template<typename Saver, typename Reducer, typename E> - inline void MapReduceKeepLowest( Tensor<gpu,1> dst, const expr::Plan<E> &plan, real_t scale, Shape<2> eshape ){ - dim3 dimBlock( kMemUnit, kMemUnit ); - dim3 dimGrid ( (eshape[0]+kMemUnit-1) >> kMemUnitBits ); - CheckLaunchParam( dimGrid, dimBlock, "MapRedKeepLowestKernel" ); - MapRedKeepLowestKernel<Saver,Reducer,kMemUnitBits><<<dimGrid,dimBlock>>>( dst, plan, scale, eshape ); - } - }; // namespace cuda - - namespace cuda{ - template<typename Saver,typename Reducer, int block_dim_bits, typename Plan> - __global__ void MapReduceKeepDim2Kernel( Tensor<gpu,1> dst, Plan plan, real_t scale, Shape<4> pshape ){ - const int block_size = 1 << block_dim_bits; - __shared__ real_t s_rec[ block_size ]; - const int c = blockIdx.x; - const index_t tot = pshape[0]*pshape[1]*pshape[3]; - - real_t res = Reducer::kInitV; - for( index_t i_offset = 0; i_offset < tot; i_offset += block_size ){ - index_t i = i_offset + threadIdx.x; - if( i< tot ){ - const index_t x = i % pshape[0]; - i /= pshape[0]; - const index_t y = i % pshape[1]; - const index_t n = i / pshape[1]; - Reducer::Reduce( res, plan.Eval( (n*pshape[2] + c) * pshape[1] + y, x ) ); - } - } - s_rec[ threadIdx.x ] = res; - __syncthreads(); - Reduce1D<Reducer,block_dim_bits>( s_rec ); - if( threadIdx.x == 0 ){ - Saver::Save( dst[c], s_rec[0]*scale ); - } - } - - template<typename Saver, typename Reducer, typename Plan> - inline void MapReduceKeepDim2( Tensor<gpu,1> dst, const Plan &plan, real_t scale, Shape<4> pshape ){ - dim3 dimBlock( kBaseThreadNum ); - dim3 dimGrid ( dst.shape[0] ); - CheckLaunchParam( dimGrid, dimBlock, "MapReduceKeepDim2" ); - MapReduceKeepDim2Kernel<Saver,Reducer,kBaseThreadBits> - <<<dimGrid,dimBlock>>>( dst, plan, scale, pshape ); - } - }; - - namespace cuda{ - template<int x_bits> - __global__ void SoftmaxKernel( Tensor<gpu,2> dst, Tensor<gpu,2> src ){ - const unsigned x_size = 1 << x_bits; - const int y = blockIdx.x; - __shared__ real_t s_rec[ x_size ]; - - // step 1: get max - if( threadIdx.x < dst.shape[ 0 ] ){ - s_rec[ threadIdx.x ] = src[ y ][ threadIdx.x ] ; - } - for( unsigned x = x_size; x < dst.shape[0]; x += x_size ){ - if( x + threadIdx.x < dst.shape[0] ){ - real_t a = src[ y ][ x + threadIdx.x ]; - s_rec[ threadIdx.x ] = max( a, s_rec[ threadIdx.x] ); - } - } - __syncthreads(); - if( threadIdx.x >= dst.shape[0] ){ - s_rec[ threadIdx.x ] = s_rec[0]; - } - __syncthreads(); - Reduce1D<red::maximum,x_bits>( s_rec ); - __syncthreads(); - real_t smax = s_rec[0]; - __syncthreads(); - s_rec[ threadIdx.x ] = 0.0f; - __syncthreads(); - - // calculate normalizer, with writeback - for( unsigned x = 0; x < dst.shape[0]; x += x_size ){ - if( x + threadIdx.x < dst.shape[0] ){ - real_t p = expf( src[ y ][ x + threadIdx.x ] - smax ); - s_rec[ threadIdx.x ] += p; - // write back first, will fetch later - dst[ y ][ x + threadIdx.x ] = p; - } - } - // calculate normalizer - __syncthreads(); - Reduce1D<red::sum,x_bits>( s_rec ); - __syncthreads(); - real_t ssum = s_rec[0]; - - for( unsigned x = 0; x < dst.shape[0]; x += x_size ){ - if( x + threadIdx.x < dst.shape[0] ){ - dst[ y ][ x + threadIdx.x ] /= ssum; - } - } - } - - inline void Softmax( Tensor<gpu,2> &dst, const Tensor<gpu,2> &src ){ - dim3 dimBlock( kBaseThreadNum ); - dim3 dimGrid ( dst.shape[1] ); - utils::Assert( dst.shape == src.shape, "Softmax: shape mismatch" ); - CheckLaunchParam( dimGrid, dimBlock, "Softmax" ); - SoftmaxKernel<kBaseThreadBits><<<dimGrid,dimBlock>>>( dst, src ); - } - }; // namespace cuda -}; // namespace mshadow -#endif // TENSOR_GPU_INL_H http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/cxxnet_op.h ---------------------------------------------------------------------- diff --git a/include/mshadow/cxxnet_op.h b/include/mshadow/cxxnet_op.h deleted file mode 100644 index 1422070..0000000 --- a/include/mshadow/cxxnet_op.h +++ /dev/null @@ -1,127 +0,0 @@ -#ifndef CXXNET_OP_H -#define CXXNET_OP_H -#pragma once -/*! - * \file cxxnet_op.h - * \brief extra mshadow operation for cxxnet - * \author Bing Xu - */ -#include "mshadow/tensor.h" - -namespace mshadow { - /*! \brief operations for algorithm */ - namespace op { - struct sigmoid { - MSHADOW_XINLINE static real_t Map(real_t a) { - return 1.0f / (1.0f + expf(-a)); - } - }; - struct sigmoid_grad { - MSHADOW_XINLINE static real_t Map(real_t a) { - return a * ( 1.0f - a ); - } - }; - - /*! \brief Rectified Linear Operation */ - struct relu { - MSHADOW_XINLINE static real_t Map(real_t a) { - using namespace std; - return max( a, 0.0f ); - } - }; - struct relu_grad { - MSHADOW_XINLINE static real_t Map(real_t a) { - return a > 0.0f ? 1.0f : 0.0f; - } - }; - - struct tanh { - MSHADOW_XINLINE static real_t Map(real_t a) { - return tanhf( a ); - } - }; - struct tanh_grad { - MSHADOW_XINLINE static real_t Map(real_t a) { - return 1.0f - a * a; - } - }; - struct softplus { - MSHADOW_XINLINE static real_t Map(real_t a) { - return logf(1 + expf(a)); - } - }; - struct softplus_grad { - MSHADOW_XINLINE static real_t Map(real_t a) { - return 1.0f / (1.0f + expf(-a)); - } - }; - struct bnll { - MSHADOW_XINLINE static real_t Map(real_t a) { - return a > 0.0f ? a + logf(1.0f + expf(-a)) : logf(1.0f + expf(a)); - } - }; - struct bnll_grad { - MSHADOW_XINLINE static real_t Map(real_t a) { - real_t expval = a > 50.0f ? 50.0f : a; // kBNLL_THRESHOLD = 50.0f - expval = expf(-expval); - return 1.0f / (1.0f + expval); - } - }; - - struct square { - MSHADOW_XINLINE static real_t Map(real_t a) { - return a * a; - } - }; - /*! \brief scaled tanh, hard code the scale factor*/ - struct stanh { - MSHADOW_XINLINE static real_t Map(real_t a) { - return 1.7159047*tanhf(0.66666667 *a); - } - }; - /*! \breif back prop for scaled tanh: */ - struct stanh_grad { - MSHADOW_XINLINE static real_t Map(real_t a) { - return 0.66666667*1.7159047 -0.66666667/1.7159047*a*a; - } - }; - - struct abs{ - MSHADOW_XINLINE static real_t Map(real_t a) { - return a < 0 ? -a : a; - } - }; - - }; //namespace op - -}; //namespace mshadow - -namespace mshadow { - namespace op { - /*! \brief used for generate Bernoulli mask */ - struct threshold { - MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { - return a < b ? 1.0f : 0.0f; - } - }; - - /*! \brief used for generate element of power */ - struct power { - MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { - return powf( a, b ); - } - }; - struct sqrtop { - MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { - return sqrt(a+b); - } - }; - struct max { - MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { - return a > b ? a : b; - } - }; - }; // namespace op -}; // namespace mshadow - -#endif // CXXNET_OP_H http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/tensor.h ---------------------------------------------------------------------- diff --git a/include/mshadow/tensor.h b/include/mshadow/tensor.h deleted file mode 100644 index 42d13d3..0000000 --- a/include/mshadow/tensor.h +++ /dev/null @@ -1,472 +0,0 @@ -#ifndef MSHADOW_TENSOR_H -#define MSHADOW_TENSOR_H -/*! - * \file tensor.h - * \brief header file of tensor data structure and functions - * covention: this lib requires explicit memory allocation and de-allocation - * all the data structure Tensor<cpu,1>, Tensor<gpu,1> are like handles(pointers), - * no memory allocation is happening during calculation - * \author Bing Xu, Tianqi Chen - */ -#include "tensor_base.h" -#include "tensor_expr.h" - -namespace mshadow { - /*! - * \brief shape of a tensor - * IMPORTANT NOTE: this shape is different from numpy.shape - * shape[0] gives the lowest dimension, shape[dimension-1] gives the highest dimension - * shape[k] corresponds to k-th dimension of tensor - * \tparam dimension dimension of tensor - */ - template<int dimension> - struct Shape { - public: - /*! \brief maximum dimension of tensor */ - const static int kMaxShape = dimension; - /*! \brief maximum dimension minus 1 */ - const static int kSubShape = dimension - 1; - public: - /*! \brief default constructor, do nothing */ - MSHADOW_XINLINE Shape(void) {} - /*! \brief constuctor */ - MSHADOW_XINLINE Shape( const Shape<dimension> &s ){ - #pragma unroll - for( int i = 0; i < kMaxShape; ++i ){ - this->shape_[i] = s[i]; - } - this->stride_ = s.stride_; - } - /*! - * \brief get corresponding index - * \param idx dimension index - * \return the corresponding dimension size - */ - MSHADOW_XINLINE index_t& operator[](index_t idx) { - return shape_[ idx ]; - } - /*! - * \brief get corresponding index - * \param idx dimension index - * \return the corresponding dimension size - */ - MSHADOW_XINLINE const index_t& operator[](index_t idx) const { - return shape_[ idx ]; - } - /*! \return whether two shape equals */ - MSHADOW_XINLINE bool operator==(const Shape<kMaxShape> &s) const { - #pragma unroll - for ( int i = 0; i < kMaxShape; ++i ) { - if (s.shape_[i] != this->shape_[i]) return false; - } - return true; - } - /*! - * flatten the higher dimension to second dimension, return a 2D shape - * \return the flat 2d shape - */ - MSHADOW_XINLINE Shape<2> FlatTo2D(void) const { - Shape<2> s; - s.stride_ = this->stride_; - s.shape_[ 0 ] = this->shape_[ 0 ]; - index_t ymax = 1; - - #pragma unroll - for (int i = 1; i < kMaxShape; ++i) { - ymax *= this->shape_[ i ]; - } - s.shape_[1] = ymax; - return s; - } - /*! \return number of valid elements */ - MSHADOW_XINLINE size_t Size(void) const{ - size_t memsz = this->shape_[ 0 ]; - #pragma unroll - for (int i = 1; i < kMaxShape; ++i) { - memsz *= this->shape_[ i ]; - } - return memsz; - } - /*! \return memory size, including the aligned x dimension */ - MSHADOW_XINLINE size_t MSize(void) const { - size_t memsz = this->stride_; - #pragma unroll - for (int i = 1; i < kMaxShape; ++i) { - memsz *= this->shape_[ i ]; - } - return memsz; - } - /*! - * \return product shape in [dimstart,dimend) - * \param dimstart start dimension - * \param dimend end dimension - */ - MSHADOW_XINLINE index_t ProdShape( int dimstart, int dimend ) const{ - index_t num = 1; - #pragma unroll - for (int i = dimstart; i < dimend; ++i) { - num *= this->shape_[ i ]; - } - return num; - } - /*! - * \brief get subshape - * \return subshape - */ - MSHADOW_XINLINE Shape<kSubShape> SubShape(void) const { - Shape<kSubShape> s; - s.stride_ = this->stride_; - // for cuda - #pragma unroll - for (int i = 0; i < kSubShape; ++i) { - s.shape_[ i ] = this->shape_[ i ]; - } - return s; - } - - public: - /*! \brief storing the dimension information */ - index_t shape_[ kMaxShape ]; - /*! - * \brief storing the stride information in x dimension - * this is used to deal with pitch allocation in gpu or sse(align x dimension to 64bit) for efficiency - */ - index_t stride_; - }; - // useful construction functions to generate shape - /*! - * \brief construct a one dimension shape, stride will equal s0 - * \param s0 size of dimension 0 - * \return the shape construction - */ - MSHADOW_XINLINE Shape<1> Shape1( index_t s0 ){ - Shape<1> s; s[0] = s0; s.stride_ = s0; - return s; - } - /*! - * \brief construct a two dimension shape, stride will equal s0 - * \param s1 size of dimension 1 - * \param s0 size of dimension 0 - * \return the shape construction - */ - MSHADOW_XINLINE Shape<2> Shape2( index_t s1, index_t s0 ){ - Shape<2> s; s[0] = s0; s[1] = s1; s.stride_ = s0; - return s; - } - /*! - * \brief construct a three dimension shape, stride will equal s0 - * \param s2 size of dimension 2 - * \param s1 size of dimension 1 - * \param s0 size of dimension 0 - * \return the shape construction - */ - MSHADOW_XINLINE Shape<3> Shape3( index_t s2, index_t s1, index_t s0 ){ - Shape<3> s; - s[0] = s0; s[1] = s1; s[2] = s2; s.stride_ = s0; - return s; - } - /*! - * \brief construct a four dimension shape, stride will equal s0 - * \param s3 size of dimension 3 - * \param s2 size of dimension 2 - * \param s1 size of dimension 1 - * \param s0 size of dimension 0 - * \return the shape construction - */ - MSHADOW_XINLINE Shape<4> Shape4( index_t s3, index_t s2, index_t s1, index_t s0 ){ - Shape<4> s; - s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; s.stride_ = s0; - return s; - } -}; // namespace mshadow - -namespace mshadow { - /*! \brief device name CPU */ - struct cpu { - /*! \brief whether this device is CPU or not */ - const static bool kDevCPU = true; - /*! \brief device flag number, identifies this device */ - const static int kDevMask = 1<<0; - }; - /*! \brief device name CPU */ - struct gpu { - /*! \brief whether this device is CPU or not */ - const static bool kDevCPU = false; - /*! \brief device flag number, identifies this device */ - const static int kDevMask = 1<<1; - }; - - // more compact template - /*! - * \brief general tensor - * \tparam Device which device the tensor is on - * \tparam dimension dimension of the tensor - */ - template<typename Device, int dimension> - struct Tensor: public expr::ContainerExp< Tensor<Device,dimension> >{ - public: - /*! \brief whether current type lies in cpu */ - const static bool kDevCPU = Device::kDevCPU; - /*! \brief dimension of subtype */ - const static int kSubdim = dimension - 1; - - public: - /*! \brief pointer to the data */ - real_t *dptr; - /*! \brief shape of the tensor */ - Shape<dimension> shape; - public: - /*! \brief default constructor */ - MSHADOW_XINLINE Tensor(void) {} - /*! \brief constructor from shape */ - MSHADOW_XINLINE Tensor(const Shape<dimension> &shape): shape(shape) {} - /*! \brief constructor from data pointer and shape */ - MSHADOW_XINLINE Tensor(real_t *dptr, const Shape<dimension> &shape): dptr((real_t*)dptr), shape(shape) {} - /*! - * \brief flatten the tensor to 2 dimension, collapse the higher dimensions together - * \return tensor after flatten - */ - MSHADOW_XINLINE Tensor<Device, 2> FlatTo2D(void) const { - return Tensor<Device, 2>(reinterpret_cast<real_t*> \ - (dptr), shape.FlatTo2D()); - } - /*! - * \brief get a element of dimension - 1 - * \param idx index - * \return the result tensor - */ - MSHADOW_XINLINE Tensor<Device, kSubdim> operator[](index_t idx) const { - Shape<kSubdim> s = shape.SubShape(); - return Tensor<Device, kSubdim>(reinterpret_cast<real_t*> \ - (dptr) + s.MSize() * idx, s); - } - /*! - * \brief slice the tensor in highest dimension [begin,end) - * \param begin begin position of slice - * \param end end position of slice - * \return tensor after slice - */ - MSHADOW_XINLINE Tensor<Device, dimension> Slice(index_t begin, index_t end) const { - Shape<dimension> s = this->shape; - s[ dimension - 1 ] = end - begin; - return Tensor<Device, dimension>(reinterpret_cast<real_t*>\ - (dptr) + s.SubShape().MSize() * begin, s); - } - public: - /*!\brief functions to fit expression template */ - inline Tensor<Device,dimension>& operator=( real_t s ){ - return this->__assign( s ); - } - /*!\brief functions to fit expression template */ - template<typename E> - inline Tensor<Device,dimension>& operator=( const expr::Exp<E,expr::type::kMapper> &exp ){ - return this->__assign( exp ); - } - /*!\brief functions to fit expression template */ - template<typename E> - inline Tensor<Device,dimension>& operator=( const expr::Exp<E,expr::type::kComplex> &exp ){ - return this->__assign( exp ); - } - }; - - /* - * respecialized class Tensor1D,thei is due to different implementation in operator[] - */ - template<typename Device> - struct Tensor<Device,1>: public expr::ContainerExp< Tensor<Device,1> >{ - public: - real_t *dptr; - Shape<1> shape; - public: - MSHADOW_XINLINE Tensor(void) {} - MSHADOW_XINLINE Tensor(const Shape<1> &shape): shape(shape) {} - MSHADOW_XINLINE Tensor(real_t *dptr, Shape<1> shape) :dptr(dptr), shape(shape) {} - - MSHADOW_XINLINE Tensor<Device, 2> FlatTo2D(void) const { - return Tensor<Device, 2>(reinterpret_cast<real_t*> \ - (dptr), shape.FlatTo2D()); - } - MSHADOW_XINLINE Tensor<Device, 1> Slice(index_t begin, index_t end) const { - Shape<1> s; - s[0] = s.stride_ = end - begin; - return Tensor<Device, 1>(reinterpret_cast<real_t*> \ - (dptr) + begin, s); - } - MSHADOW_XINLINE real_t &operator[](index_t idx) { return dptr[ idx ]; } - MSHADOW_XINLINE const real_t &operator[](index_t idx)const { return dptr[ idx ]; } - public: - // functions to fit expression template - inline Tensor<Device,1>& operator=( double s ){ - return this->__assign( s ); - } - template<typename E> - inline Tensor<Device,1>& operator=( const expr::Exp<E,expr::type::kMapper> &exp ){ - return this->__assign( exp ); - } - template<typename E> - inline Tensor<Device,1>& operator=( const expr::Exp<E,expr::type::kComplex> &exp ){ - return this->__assign( exp ); - } - }; -}; // namespace mshadow - -// add unroll loops for the shape -namespace mshadow { - // function declarations - /*! - * \brief initialize tensor engine, used to call intialization functions of dependent libs - * this function should be called before all GPU tensor operations, - * for using tensors in CPU, this call is actually not needed - * \param device_id GPU device id to be choosed - */ - inline void InitTensorEngine( int device_id=0 ); - /*! - * \brief Shutdown tensor engine, - * this function should be called after all GPU tensor operations, - * for using tensors in CPU, this call is actually not needed - */ - inline void ShutdownTensorEngine( void ); - - /*! - * \brief CPU/CPU: allocate space for CTensor, according to the shape in the obj - * this function is responsible to set the stride_ in each obj.shape - * \tparam dim specify the dim of tensor - * \param obj the tensor object, with shape specified - * \param pad whether padding dimension 0, to make last dimension aligned, - * padding may help improve efficiency of matrix multiplications - * if true, will allocate space with stride_ that may not equals shape[0] - * if false, will allocate continuous space - */ - template<int dim> - inline void AllocSpace(Tensor<cpu,dim> &obj, bool pad = MSHADOW_ALLOC_PAD); - /*! \brief refer to comment of cpu ver \sa AllocSpace */ - template<int dim> - inline void AllocSpace(Tensor<gpu,dim> &obj, bool pad = MSHADOW_ALLOC_PAD); - - /*! - * \brief CPU/GPU: free the space of tensor, will set obj.dptr to NULL - * \tparam dim specify the dim of tensor - * \param obj the tensor object - */ - template<int dim> - inline void FreeSpace(Tensor<cpu,dim> &obj); - /*! \brief refer to comment of cpu ver \sa FreeSpace */ - template<int dim> - inline void FreeSpace(Tensor<gpu,dim> &obj); - - /*! - * \brief CPU/GPU: short cut to allocate and initialize a Tensor - * \tparam Device device of tensor - * \tparam dim dimention of tensor - * \param shape: shape of tensor - * \param initv: initialization value - * \param pad : padding option - * \sa AllocSpace - */ - template<typename Device, int dim> - inline Tensor<Device,dim> NewTensor(const Shape<dim> &shape, real_t initv, bool pad = MSHADOW_ALLOC_PAD); - - /*! - * \brief copy data from one tensor to another, with same shape - * \tparam dim specify the dim of tensor - * \param dst target tensor - * \param src source tensor - */ - template<int dim> - inline void Copy(Tensor<cpu,dim> dst, const Tensor<cpu,dim> &src ); - /*! \brief refer to comment of cpu ver \sa Copy */ - template<int dim> - inline void Copy(Tensor<cpu,dim> dst, const Tensor<gpu,dim> &src ); - /*! \brief refer to comment of cpu ver \sa Copy */ - template<int dim> - inline void Copy(Tensor<gpu,dim> dst, const Tensor<cpu,dim> &src ); - /*! \brief refer to comment of cpu ver \sa Copy */ - template<int dim> - inline void Copy(Tensor<gpu,dim> dst, const Tensor<gpu,dim> &src ); - - - /*! - * \brief CPU/GPU: normalize softmax: dst[i][j] = exp( energy[i][j] ) /( sum_j exp( energy[i][j] ) ) - * \param dst destination - * \param energy input energy - */ - inline void Softmax( Tensor<cpu,2> dst, const Tensor<cpu,2> &energy ); - /*! \brief refer to comment of cpu ver \sa Softmax */ - inline void Softmax( Tensor<gpu,2> dst, const Tensor<gpu,2> &energy ); - -}; // namespace mshadow - - -namespace mshadow{ - // function declarations to support expression, no need to understand them - // these functions do not need to be directly used - - /*! - * \brief CPU/GPU: map a expression to a tensor, this function calls MapPlan - * \tparam Saver specify storage method - * \tparam dim dim of the tensor, during usage, there is no need to specify this parameter - * \tparam E specifies the expression type, not need to specify this parameter during usage - * \tparam etype expression type - * \param dst destination - * \param exp expression - * \sa namespace mshadow:sv, mshadow::op, mshadow::expr - */ - template<typename Saver, int dim, typename E, int etype> - inline void MapExp(Tensor<cpu,dim> dst, const expr::Exp<E,etype> &exp ); - /*! \brief refer to comment of cpu ver \sa MapExp */ - template<typename Saver, int dim, typename E, int etype> - inline void MapExp(Tensor<gpu,dim> dst, const expr::Exp<E,etype> &exp ); - - /*! - * \brief CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0) - * \tparam Saver specify storage method - * \tparam Reducer specify a reducer method - * \tparam E specifies the expression type, not need to specify this parameter during usage - * \tparam etype expression type - * \param dst destination - * \param exp expression - * \param scale scale the result before save - * \sa namespace mshadow:sv, mshadow::op, mshadow::red, mshadow::expr - */ - template<typename Saver, typename Reducer, typename E, int etype> - inline void MapReduceKeepLowest( Tensor<cpu,1> dst, const expr::Exp<E,etype> &exp, real_t scale = 1.0f ); - /*! \brief refer to comment of cpu ver \sa MapReduceKeepLowest */ - template<typename Saver, typename Reducer, typename E, int etype> - inline void MapReduceKeepLowest( Tensor<gpu,1> dst, const expr::Exp<E,etype> &exp, real_t scale = 1.0f ); - - - /*! - * \brief CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2) - * \tparam Saver specify storage method - * \tparam Reducer specify a reducer method - * \tparam E specifies the expression type, not need to specify this parameter during usage - * \tparam dimkeep the target dimension to be kept, should be larger than 0, for 0, use MapReduceKeepLowest - * \tparam etype expression type - * \param dst destination - * \param exp expression - * \param scale scale the result before save - * \sa namespace mshadow:sv, mshadow::op, mshadow::red, mshadow::expr - */ - template<typename Saver, typename Reducer, int dimkeep, typename E, int etype> - inline void MapReduceKeepHighDim( Tensor<cpu,1> dst, const expr::Exp<E,etype> &exp, real_t scale = 1.0f ); - /*! \brief refer to comment of cpu ver \sa MapReduceKeepHighDim */ - template<typename Saver, typename Reducer, int dimkeep, typename E, int etype> - inline void MapReduceKeepHighDim( Tensor<gpu,1> dst, const expr::Exp<E,etype> &exp, real_t scale = 1.0f ); - -};// namespace mshadow - -// execution implementation of expression evaluations -#include "tensor_expr_engine-inl.hpp" -// cpu implementation of functions -#include "tensor_cpu-inl.hpp" -// gpu implementation of functions -#include "tensor_gpu-inl.hpp" -// extension of expressions -#include "tensor_expr_ext.h" -// io -#include "tensor_io.h" -// container -#include "tensor_container.h" -// random number generator -#include "tensor_random.h" -#endif // TENSOR_H http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/tensor_base.h ---------------------------------------------------------------------- diff --git a/include/mshadow/tensor_base.h b/include/mshadow/tensor_base.h deleted file mode 100644 index b251cba..0000000 --- a/include/mshadow/tensor_base.h +++ /dev/null @@ -1,298 +0,0 @@ -#ifndef MSHADOW_TENSOR_BASE_H -#define MSHADOW_TENSOR_BASE_H -/*! - * \file tensor_base.h - * \brief definitions of base types, macros functions - * - * \author Bing Xu, Tianqi Chen - */ -#include <cmath> -#include <cstdio> -#include <cfloat> -#include <climits> -#include <algorithm> -// macro defintiions - -/*!\brief if this macro is define to be 1, mshadow should compile without any of other libs */ -#ifndef MSHADOW_STAND_ALONE - #define MSHADOW_STAND_ALONE 0 -#endif - -/*! \brief whether do padding during allocation */ -#ifndef MSHADOW_ALLOC_PAD - #define MSHADOW_ALLOC_PAD true -#endif - -/*! - * \brief x dimension of data must be bigger pad_size * ratio to be alloced padded memory, otherwise use tide allocation - * for example, if pad_ratio=2, GPU memory alignement size is 32, then we will only allocate padded memory if x dimension > 64 - * set it to 0 then we will always allocate padded memory - */ -#ifndef MSHADOW_MIN_PAD_RATIO - #define MSHADOW_MIN_PAD_RATIO 2 -#endif - -#if MSHADOW_STAND_ALONE - #define MSHADOW_USE_CBLAS 0 - #define MSHADOW_USE_MKL 0 - #define MSHADOW_USE_CUDA 0 -#endif - -/*! \brief use CBLAS for CBLAS */ -#ifndef MSHADOW_USE_CBLAS - #define MSHADOW_USE_CBLAS 0 -#endif -/*! \brief use MKL for BLAS */ -#ifndef MSHADOW_USE_MKL - #define MSHADOW_USE_MKL 1 -#endif -/*! \brief use CUDA support, must ensure that the cuda include path is correct, or directly compile using nvcc */ -#ifndef MSHADOW_USE_CUDA - #define MSHADOW_USE_CUDA 1 -#endif -/*! \brief use single precition float */ -#ifndef MSHADOW_SINGLE_PRECISION - #define MSHADOW_SINGLE_PRECISION 1 -#endif -/*! \brief whether use SSE */ -#ifndef MSHADOW_USE_SSE - #define MSHADOW_USE_SSE 1 -#endif -/*! \brief whether use NVML to get dynamic info */ -#ifndef MSHADOW_USE_NVML - #define MSHADOW_USE_NVML 0 -#endif -// SSE is conflict with cudacc -#ifdef __CUDACC__ - #undef MSHADOW_USE_SSE - #define MSHADOW_USE_SSE 0 -#endif - -#if MSHADOW_USE_CBLAS -extern "C"{ - #include <cblas.h> -} -#elif MSHADOW_USE_MKL - #include <mkl.h> - #include <mkl_cblas.h> - #include <mkl_vsl.h> - #include <mkl_vsl_functions.h> -#endif - -#if MSHADOW_USE_CUDA - #include <cublas.h> - #include <curand.h> -#endif - -#if MSHADOW_USE_NVML - #include <nvml.h> -#endif -// -------------------------------- -// MSHADOW_XINLINE is used for inlining template code for both CUDA and CPU code. -#ifdef MSHADOW_XINLINE - #error "MSHADOW_XINLINE must not be defined" -#endif -#ifdef __CUDACC__ - #define MSHADOW_XINLINE inline __attribute__((always_inline)) __device__ __host__ -#else - #define MSHADOW_XINLINE inline __attribute__((always_inline)) -#endif -/*! \brief cpu force inline */ -#define MSHADOW_CINLINE inline __attribute__((always_inline)) - -#if defined(__GXX_EXPERIMENTAL_CXX0X) || defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L - #define MSHADOW_CONSTEXPR constexpr -#else - #define MSHADOW_CONSTEXPR const -#endif - -/*! \brief namespace for mshadow */ -namespace mshadow { - /*! \brief buffer size for each random number generator */ - const unsigned kRandBufferSize = 1000000; - /*! \brief pi */ - const float kPi = 3.1415926f; - -#if MSHADOW_SINGLE_PRECISION - /*! \brief type that will be used for content */ - typedef float real_t; -#else - typedef double real_t; -#endif - /*! \brief type that will be used for index */ - typedef unsigned index_t; -}; // namespace mshadow - -namespace mshadow { - /*! \brief namespace for operators */ - namespace op { - // binary operator - /*! \brief mul operator */ - struct mul{ - /*! \brief map a, b to result using defined operation */ - MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { - return a * b; - } - }; - /*! \brief plus operator */ - struct plus { - /*! \brief map a, b to result using defined operation */ - MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { - return a + b; - } - }; - /*! \brief minus operator */ - struct minus { - /*! \brief map a, b to result using defined operation */ - MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { - return a - b; - } - }; - /*! \brief divide operator */ - struct div { - /*! \brief map a, b to result using defined operation */ - MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { - return a / b; - } - }; - /*! \brief get rhs */ - struct right { - /*! \brief map a, b to result using defined operation */ - MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { - return b; - } - }; - }; // namespace op - - /*! \brief namespace for savers */ - namespace sv { - /*! \brief save to saver: = */ - struct saveto { - /*! \brief save b to a using save method */ - MSHADOW_XINLINE static void Save(real_t& a, real_t b) { - a = b; - } - /*! \brief helper constant to use BLAS, alpha */ - MSHADOW_CONSTEXPR static real_t kAlphaBLAS = 1.0f; - /*! \brief helper constant to use BLAS, beta */ - MSHADOW_CONSTEXPR static real_t kBetaBLAS = 0.0f; - /*! \brief corresponding binary operator type */ - typedef op::right OPType; - }; - /*! \brief save to saver: += */ - struct plusto { - /*! \brief save b to a using save method */ - MSHADOW_XINLINE static void Save(real_t& a, real_t b) { - a += b; - } - /*! \brief helper constant to use BLAS, alpha */ - MSHADOW_CONSTEXPR static real_t kAlphaBLAS = 1.0f; - /*! \brief helper constant to use BLAS, beta */ - MSHADOW_CONSTEXPR static real_t kBetaBLAS = 1.0f; - /*! \brief corresponding binary operator type */ - typedef op::plus OPType; - }; - /*! \brief minus to saver: -= */ - struct minusto { - /*! \brief save b to a using save method */ - MSHADOW_XINLINE static void Save(real_t& a, real_t b) { - a -= b; - } - /*! \brief helper constant to use BLAS, alpha */ - MSHADOW_CONSTEXPR static real_t kAlphaBLAS = -1.0f; - /*! \brief helper constant to use BLAS, beta */ - MSHADOW_CONSTEXPR static real_t kBetaBLAS = 1.0f; - /*! \brief corresponding binary operator type */ - typedef op::minus OPType; - }; - /*! \brief multiply to saver: *= */ - struct multo { - /*! \brief save b to a using save method */ - MSHADOW_XINLINE static void Save(real_t& a, real_t b) { - a *= b; - } - /*! \brief corresponding binary operator type */ - typedef op::mul OPType; - }; - /*! \brief divide to saver: /= */ - struct divto { - /*! \brief save b to a using save method */ - MSHADOW_XINLINE static void Save(real_t& a, real_t b) { - a /= b; - } - /*! \brief corresponding binary operator type */ - typedef op::div OPType; - }; - }; // namespace sv - - - namespace op { - // unary operator/ function: example - // these operators can be defined by user, in the same style as binary and unary operator - // to use, simply write F<op::identity>( src ) - /*! \brief identity function that maps a real number to it self */ - struct identity{ - /*! \brief map a to result using defined operation */ - MSHADOW_XINLINE static real_t Map(real_t a) { - return a; - } - }; - }; // namespace op - - /*! \brief namespace for potential reducer operations */ - namespace red { - /*! \brief sum reducer */ - struct sum { - /*! \brief do reduction into dst */ - MSHADOW_XINLINE static void Reduce( volatile real_t& dst, volatile real_t src ) { - dst += src; - } - /*! \brief calculate gradient of redres with respect to redsrc, redres: reduced result, redsrc: one of reduction element */ - MSHADOW_XINLINE static real_t PartialGrad( real_t redres, real_t redsrc ) { - return 1.0f; - } - /*! \brief an intial value of reducer */ - MSHADOW_CONSTEXPR static real_t kInitV = 0.0f; - }; - /*! \brief maximum reducer */ - struct maximum { - /*! \brief do reduction into dst */ - MSHADOW_XINLINE static void Reduce( volatile real_t& dst, volatile real_t src ) { - using namespace std; - dst = max( dst, src ); - } - /*! \brief calculate gradient of redres with respect to redsrc, redres: reduced result, redsrc: one of reduction element */ - MSHADOW_XINLINE static real_t PartialGrad( real_t redres, real_t redsrc ) { - return redres == redsrc ? 1.0f: 0.0f; - } - /*! \brief an intial value of reducer */ -#if MSHADOW_SINGLE_PRECISION - MSHADOW_CONSTEXPR static real_t kInitV = -FLT_MAX; -#else - MSHADOW_CONSTEXPR static real_t kInitV = -DBL_MAX; -#endif - }; - }; - - /*! \brief namespace for helper utils of the project */ - namespace utils{ - /*! \brief send error message then exit */ - inline void Error( const char *msg ){ - fprintf( stderr, "Error:%s\n",msg ); - exit( -1 ); - } - /*! \brief assert a expression is true */ - inline void Assert( bool exp ){ - if( !exp ) Error( "AssertError" ); - } - /*! \brief assert a expression is true */ - inline void Assert( bool exp, const char *msg ){ - if( !exp ) Error( msg ); - } - /*! \brief warning */ - inline void Warning( const char *msg ){ - fprintf( stderr, "warning:%s\n",msg ); - } - }; // namespace utils -}; // namespace mshadow -#endif // TENSOR_BASE_H http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/tensor_container.h ---------------------------------------------------------------------- diff --git a/include/mshadow/tensor_container.h b/include/mshadow/tensor_container.h deleted file mode 100644 index f0699e7..0000000 --- a/include/mshadow/tensor_container.h +++ /dev/null @@ -1,152 +0,0 @@ -#ifndef MSHADOW_TENSOR_CONTAINER_H -#define MSHADOW_TENSOR_CONTAINER_H -/*! - * \file tensor_container.h - * \brief tensor container that does memory allocation and resize like STL - * \author Tianqi Chen - */ -#include "tensor.h" -#include "tensor_io.h" - -namespace mshadow{ - /*! - * \brief tensor container that does memory allocation and resize like STL, - * use it to save the lines of FreeSpace in class. - * Do not abuse it, efficiency can come from pre-allocation and no re-allocation - * - * \tparam Device which device the tensor is on - * \tparam dimension dimension of the tensor - */ - template<typename Device, int dimension> - class TensorContainer: public Tensor<Device,dimension>{ - public: - /*! - * \brief constructor - * \param pad whether use padding alignment in space allocation - */ - TensorContainer( bool pad = MSHADOW_ALLOC_PAD ){ - this->pad_ = pad; - this->dptr = data_.dptr = NULL; - this->shape[0] = 0; - this->shape.stride_ = 0; - this->data_.shape.stride_ = 0; - this->data_.shape[1] = 0; - } - /*! - * \brief constructor - * \param shape intial shape - */ - TensorContainer( const Shape<dimension> &shape ){ - this->pad_ = MSHADOW_ALLOC_PAD; - data_.dptr = NULL; - this->AllocByShape( shape ); - } - /*! - * \brief constructor - * \param shape intial shape - * \param initv intial value - */ - TensorContainer( const Shape<dimension> &shape, real_t initv ){ - this->pad_ = MSHADOW_ALLOC_PAD; - data_.dptr = NULL; - this->AllocByShape( shape ); - (*this) = initv; - } - ~TensorContainer( void ){ - this->FreeSpace(); - } - /*! - * \brief resize the container to given shape, content is NOT preserved - * \param shape target shape - */ - inline void Resize( const Shape<dimension> &shape ){ - Shape<2> s2 = shape.FlatTo2D(); - if( s2.shape_[0] > data_.shape.stride_ || s2.shape_[1] > data_.shape[1] ){ - this->AllocByShape( shape ); - }else{ - this->shape = shape; - if( this->pad_ ){ - this->shape.stride_ = data_.shape.stride_; - }else{ - this->shape.stride_ = this->shape[ 0 ]; - } - } - } - /*! - * \brief resize the container to given shape, and initialize, content is NOT preserved - * \param shape target shape - * \param initv initialization value - */ - inline void Resize( const Shape<dimension> &shape, real_t initv ){ - this->Resize( shape ); - (*this) = initv; - } - /*! \brief set whether padding is allowed in tensor */ - inline void set_pad( bool pad ){ - this->pad_ = pad; - } - /*! - * \brief save by binary format - * \param fo output binary stream - * \tparam TStream type of stream, need to support Read, Write, one example is utils::IStream. - */ - template<typename TStream> - inline void SaveBinary( TStream &fo ) const{ - mshadow::SaveBinary( fo, *this ); - } - /*! - * \brief load by binary format, a temp Tensor<cpu,dim> storage will be allocated - * \param fi input binary stream - * \tparam TStream type of stream, need to support Read, Write, one example is utils::IStream. - */ - template<typename TStream> - inline void LoadBinary( TStream &fi ) { - Tensor<cpu,dimension> tmp; - mshadow::LoadBinary( fi, tmp, false ); - this->Resize( tmp.shape ); - Copy( *this, tmp ); - mshadow::FreeSpace( tmp ); - } - public: - // functions to fit exp template - inline Tensor<Device,dimension>& operator=( real_t s ){ - return this->__assign( s ); - } - template<typename E> - inline Tensor<Device,dimension>& operator=( const expr::Exp<E,expr::type::kMapper> &exp ){ - return this->__assign( exp ); - } - template<typename E> - inline Tensor<Device,dimension>& operator=( const expr::Exp<E,expr::type::kComplex> &exp ){ - return this->__assign( exp ); - } - private: - /*! \brief whether we do padding in the space */ - bool pad_; - /*! \brief the shape of data_ is actually current data space */ - Tensor<Device, 2> data_; - private: - inline void FreeSpace (void){ - if( data_.dptr != NULL ){ - mshadow::FreeSpace( data_ ); - data_.dptr = this->dptr = NULL; - } - } - inline void AllocByShape (const Shape<dimension>& shape){ - if( data_.dptr != NULL ){ - this->FreeSpace(); - } - data_.shape = shape.FlatTo2D(); - mshadow::AllocSpace( data_, pad_ ); - this->dptr = data_.dptr; - this->shape = shape; - if( this->pad_ ){ - this->shape.stride_ = data_.shape.stride_; - }else{ - this->shape.stride_ = shape[0]; - } - } - }; -};// namespace mshadow - -#endif http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/tensor_cpu-inl.hpp ---------------------------------------------------------------------- diff --git a/include/mshadow/tensor_cpu-inl.hpp b/include/mshadow/tensor_cpu-inl.hpp deleted file mode 100644 index 0fa3cfa..0000000 --- a/include/mshadow/tensor_cpu-inl.hpp +++ /dev/null @@ -1,168 +0,0 @@ -#ifndef MSHADOW_TENSOR_CPU_INL_HPP -#define MSHADOW_TENSOR_CPU_INL_HPP -/*! - * \file tensor_cpu-inl.hpp - * \brief implementation of CPU host code - * \author Bing Xu, Tianqi Chen - */ -#include <cstring> -#include "tensor_base.h" -#include "tensor_sse-inl.hpp" - -namespace mshadow { - template<int dim> - inline void AllocSpace(Tensor<cpu,dim> &obj, bool pad ){ - size_t pitch; - if( pad ){ - obj.dptr = (real_t*)sse2::AlignedMallocPitch - ( pitch, obj.shape[0] * sizeof(real_t), obj.FlatTo2D().shape[1] ); - obj.shape.stride_ = static_cast<index_t>( pitch / sizeof(real_t) ); - }else{ - obj.shape.stride_ = obj.shape[0]; - obj.dptr = (real_t*)sse2::AlignedMallocPitch - ( pitch, obj.shape.Size() * sizeof(real_t), 1 ); - } - } - - template<typename Device, int dim> - inline Tensor<Device,dim> NewTensor(const Shape<dim> &shape, real_t initv, bool pad ){ - Tensor<Device, dim> obj( shape ); - AllocSpace( obj, pad ); - MapExp<sv::saveto>( obj, expr::ScalarExp( initv ) ); - return obj; - } - - template<int dim> - inline void FreeSpace(Tensor<cpu,dim> &obj){ - sse2::AlignedFree( obj.dptr ); - obj.dptr = NULL; - } - - template<int dim> - inline void Copy(Tensor<cpu,dim> _dst, const Tensor<cpu,dim> &_src ){ - utils::Assert( _dst.shape == _src.shape, "Copy:shape mismatch" ); - Tensor<cpu,2> dst = _dst.FlatTo2D(); - Tensor<cpu,2> src = _src.FlatTo2D(); - for (index_t y = 0; y < dst.shape[1]; ++y ) { - memcpy( dst[y].dptr, src[y].dptr, sizeof(real_t) * dst.shape[0] ); - } - } - - template<typename Saver, typename E, int dim> - inline void MapPlan(Tensor<cpu,dim> _dst, const expr::Plan<E> &plan){ - Tensor<cpu,2> dst = _dst.FlatTo2D(); - for (index_t y = 0; y < dst.shape[1]; ++y ) { - for (index_t x = 0; x < dst.shape[0]; ++x ) { - // trust your compiler! -_- they will optimize it - Saver::Save(dst[y][x], plan.Eval( y, x ) ); - } - } - } - - // code to handle SSE optimization - template<bool pass_check,typename Saver, int dim, typename E, int etype> - struct MapExpCPUEngine; - template<typename SV, int dim, typename E, int etype> - struct MapExpCPUEngine<false,SV,dim,E,etype>{ - inline static void Map(Tensor<cpu,dim> dst, const expr::Exp<E,etype> &exp ){ - MapPlan<SV>( dst, MakePlan( exp.self() ) ); - } - }; - - #if MSHADOW_USE_SSE - template<typename SV, int dim, typename E, int etype> - struct MapExpCPUEngine<true,SV,dim,E,etype>{ - inline static void Map(Tensor<cpu,dim> dst, const expr::Exp<E,etype> &exp ){ - using namespace expr; - if( SSEAlignCheck<dim,E>::Check( exp.self() ) && SSEAlignCheck< dim,Tensor<cpu,dim> >::Check(dst) ){ - MapSSEPlan<SV>( dst, MakeSSEPlan( exp.self() ) ); - }else{ - MapPlan<SV>( dst, MakePlan( exp.self() ) ); - } - } - }; - #endif - - template<typename Saver, int dim, typename E, int etype> - inline void MapExp(Tensor<cpu,dim> dst, const expr::Exp<E,etype> &exp ){ - using namespace expr; - TypeCheckPass< TypeCheck<cpu,dim,E>::kMapPass >::Error_All_Tensor_in_Exp_Must_Have_Same_Type(); - Shape<dim> eshape = ShapeCheck<dim,E>::Check( exp.self() ); - utils::Assert( eshape[0] == 0 || eshape == dst.shape, "Assignment: Shape of Tensors in expression is not consistent with target" ); - #if MSHADOW_USE_SSE - MapExpCPUEngine< SSECheck<E>::kPass,Saver,dim,E,etype >::Map( dst, exp ); - #else - MapExpCPUEngine< false,Saver,dim,E,etype >::Map( dst, exp ); - #endif - } - - template<typename Saver, typename Reducer, typename E, int etype> - inline void MapReduceKeepLowest( Tensor<cpu,1> dst, const expr::Exp<E,etype> &exp, real_t scale ){ - using namespace expr; - TypeCheckPass< TypeCheck<cpu,1,E>::kRedPass >::Error_TypeCheck_Not_Pass_For_Reduce_Exp(); - Shape<2> eshape = ShapeCheck< ExpInfo<E>::kDim, E >::Check( exp.self() ).FlatTo2D(); - - utils::Assert( eshape[0] == dst.shape[0], "reduction dimension do not match" ); - utils::Assert( eshape[1] != 0, "can not reduce over empty tensor" ); - // execution - expr::Plan<E> plan = MakePlan( exp.self() ); - for( index_t x = 0; x < eshape[0]; ++x ){ - real_t res = plan.Eval( 0, x ); - for( index_t y = 1; y < eshape[1]; ++y ){ - Reducer::Reduce( res, plan.Eval( y, x ) ); - } - Saver::Save( dst[x], res*scale ); - } - } - - template<typename Saver, typename Reducer, int dimkeep, typename E, int etype> - inline void MapReduceKeepHighDim( Tensor<cpu,1> dst, const expr::Exp<E,etype> &exp, real_t scale ){ - using namespace expr; - TypeCheckPass< TypeCheck<cpu,dimkeep,E>::kRedPass >::Error_TypeCheck_Not_Pass_For_Reduce_Exp(); - typedef Shape< ExpInfo<E>::kDim > EShape; - EShape eshape = ShapeCheck< ExpInfo<E>::kDim, E >::Check( exp.self() ); - utils::Assert( eshape[dimkeep] == dst.shape[0], "reduction dimension do not match" ); - // use equvalent form - Shape<4> pshape = Shape4( eshape.ProdShape(dimkeep+1,EShape::kMaxShape), eshape[dimkeep], - eshape.ProdShape(1,dimkeep), eshape[0] ); - - // execution - expr::Plan<E> plan = MakePlan( exp.self() ); - - for( index_t c = 0; c < pshape[2]; ++c ){ - real_t res = Reducer::kInitV; - for( index_t n = 0; n < pshape[3]; ++n ){ - real_t tres = Reducer::kInitV; - for( index_t y = 0; y < pshape[1]; ++y ){ - for( index_t x = 0; x < pshape[0]; ++x ){ - Reducer::Reduce( tres, plan.Eval( (n*pshape[2] + c) * pshape[1] + y, x ) ); - } - } - Reducer::Reduce( res, tres ); - } - Saver::Save( dst[c], res*scale ); - } - } - - inline void Softmax( Tensor<cpu,1> dst, const Tensor<cpu,1>& energy ){ - real_t mmax = energy[0]; - for( real_t x = 1; x < dst.shape[0]; ++x ) - if( mmax < energy[x] ) mmax = energy[x]; - real_t sum = 0.0f; - for( index_t x = 0; x < dst.shape[0]; ++x ){ - dst[x] = std::exp( energy[x] - mmax ); - sum += dst[x]; - } - for( index_t x = 0; x < dst.shape[0]; ++x ){ - dst[x] /= sum; - } - } - inline void Softmax( Tensor<cpu,2> dst, const Tensor<cpu,2>& energy ){ - utils::Assert( dst.shape == energy.shape, "Softmax: shape mismatch" ); - for( index_t y = 0; y < dst.shape[1]; ++y ){ - Softmax( dst[y], energy[y] ); - } - } -}; // namespace mshadow - -#endif // TENSOR_CPU_INL_HPP http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/tensor_expr.h ---------------------------------------------------------------------- diff --git a/include/mshadow/tensor_expr.h b/include/mshadow/tensor_expr.h deleted file mode 100644 index ac8fde7..0000000 --- a/include/mshadow/tensor_expr.h +++ /dev/null @@ -1,367 +0,0 @@ -#ifndef MSHADOW_TENSOR_EXPR_H -#define MSHADOW_TENSOR_EXPR_H -/*! - * \file tensor_expr.h - * \brief definitions of abstract expressions and expressions template - * \author Tianqi Chen, Bing Xu - */ -#include "tensor_base.h" - -namespace mshadow{ - /*! - * \brief namespace for abstract expressions and expressions template, - * have no dependecy on tensor.h, - * These data structure takes no charge in computations, - * they are only used to define operations and represent expression in a symbolic way - */ - namespace expr{ - - /*! \brief type of expressions */ - namespace type{ - /*! \brief this expression directly correspnds to a data class */ - const int kContainer = 0; - /*! \brief this only contains element-wise vector operations */ - const int kMapper = 1; - /*! \brief othercase: e.g dot product */ - const int kComplex = 3; - }; - - /*! - * \brief expression engine that actually interprets these expressions - * this is a function template that needed to be implemented for specific expressions - */ - template<typename Saver,typename Container> - struct ExpEngine{ - template<typename EType> - inline static void Eval( Container& dst, const EType &exp ); - }; - - template<typename Container> - class ContainerExp; - class ScalarExp; - - /*! - * \brief base class for expression - * \tparam SubType inheritated class must put their type into this parameter - * \tparam exp_type expression type, see namespace type - */ - template<typename SubType, int exp_type> - struct Exp{ - public: - /*! \return subtype instance of current class */ - inline const SubType& self( void ) const{ - return *static_cast<const SubType*>(this); - } - /*! \return reference of subtype instance of current class */ - inline SubType& refself( void ){ - return *static_cast<SubType*>(this); - } - }; - - /*! \brief scalar expression */ - struct ScalarExp: public Exp<ScalarExp, type::kMapper>{ - /*! \brief scalar value */ - real_t scalar_; - /*! \brief constructor */ - ScalarExp( real_t scalar ):scalar_(scalar){} - }; - - /*! \brief represent a transpose expression of a container */ - template<typename EType> - struct TransposeExp: public Exp< TransposeExp<EType>, type::kComplex >{ - public: - /*! \brief expression to be transposed */ - const EType &exp; - /*! \brief constructor */ - TransposeExp( const EType &e ):exp(e){} - /*! \brief transpose expression */ - inline const EType & T( void ) const{ - return exp; - } - }; - - /*! - * \brief base class of all variables, that can be assigned to values - * \tparam Container the actually class of data container, e.g. CTensor1D - */ - template<typename Container> - class ContainerExp: public Exp< Container, type::kContainer >{ - public: - /*! - *\brief transpose of a matrix - *\return transpose of current expression - */ - inline const TransposeExp<Container> T( void ) const{ - return TransposeExp<Container>( this->self() ); - } - public: - /*! \brief operator overload */ - inline Container &operator+=( real_t s ){ - ExpEngine<sv::plusto,Container>::Eval( this->refself(), ScalarExp(s) ); - return this->refself(); - } - /*! \brief operator overload */ - inline Container &operator-=( real_t s ){ - ExpEngine<sv::minusto,Container>::Eval( this->refself(), ScalarExp(s) ); - return this->refself(); - } - /*! \brief operator overload */ - inline Container &operator*=( real_t s ){ - ExpEngine<sv::multo,Container>::Eval( this->refself(), ScalarExp(s) ); - return this->refself(); - } - /*! \brief operator overload */ - inline Container &operator/=( real_t s ){ - ExpEngine<sv::divto,Container>::Eval( this->refself(), ScalarExp(s) ); - return this->refself(); - } - /*! \brief operator overload */ - inline Container &__assign( real_t s ){ - ExpEngine<sv::saveto,Container>::Eval( this->refself(), ScalarExp(s) ); - return this->refself(); - } - public: - /*! \brief implementation of operator=, note that we can not define container = container */ - template<typename E> - inline Container &__assign( const Exp<E,type::kMapper> &exp ){ - ExpEngine<sv::saveto,Container>::Eval( this->refself(), exp.self() ); - return this->refself(); - } - /*! \brief implementation of operator=, note that we can not define container = container */ - template<typename E> - inline Container &__assign( const Exp<E,type::kComplex> &exp ){ - ExpEngine<sv::saveto,Container>::Eval( this->refself(), exp.self() ); - return this->refself(); - } - /*! \brief implementation of operator+= */ - template<typename E,int etype> - inline Container &operator+=( const Exp<E,etype> &exp ){ - ExpEngine<sv::plusto,Container>::Eval( this->refself(), exp.self() ); - return this->refself(); - } - /*! \brief implementation of operator-= */ - template<typename E,int etype> - inline Container &operator-=( const Exp<E,etype> &exp ){ - ExpEngine<sv::minusto,Container>::Eval( this->refself(), exp.self() ); - return this->refself(); - } - /*! \brief implementation of operator*= */ - template<typename E,int etype> - inline Container &operator*=( const Exp<E,etype> &exp ){ - ExpEngine<sv::multo,Container>::Eval( this->refself(), exp.self() ); - return this->refself(); - } - /*! \brief implementation of operator/= */ - template<typename E,int etype> - inline Container &operator/=( const Exp<E,etype> &exp ){ - ExpEngine<sv::divto,Container>::Eval( this->refself(), exp.self() ); - return this->refself(); - } - }; - }; // namespace expr - - namespace expr{ - /*! - * \brief matrix multiplication expression dot( lhs[.T], rhs[.T] ) - * \tparam TA type of lhs - * \tparam TB type of rhs - * \tparam ltrans whether lhs is transposed - * \tparam rtrans whether rhs is transposed - */ - template<typename TA,typename TB,bool ltrans,bool rtrans> - struct DotExp: public Exp< DotExp<TA,TB,ltrans,rtrans>, type::kComplex >{ - /*! \brief left operand */ - const TA& lhs_; - /*! \brief right operand */ - const TB& rhs_; - /*! \brief scale over result */ - real_t scale_; - /*! \brief constructor */ - DotExp( const TA &lhs, const TB &rhs, real_t scale ) - :lhs_(lhs),rhs_(rhs),scale_(scale){} - }; - - /*! \brief dot operator def */ - template<typename TA, typename TB> - inline DotExp<TA,TB,false,false> dot( const ContainerExp<TA> &lhs, const ContainerExp<TB> &rhs ){ - return DotExp<TA,TB,false,false>( lhs.self(), rhs.self(), 1.0f ); - } - /*! \brief dot operator def */ - template<typename TA, typename TB> - inline DotExp<TA,TB,true,false> dot( const TransposeExp<TA> &lhs, const ContainerExp<TB> &rhs ){ - return DotExp<TA,TB,true,false>( lhs.exp, rhs.self(), 1.0f ); - } - /*! \brief dot operator def */ - template<typename TA, typename TB> - inline DotExp<TA,TB,false,true> dot( const ContainerExp<TA> &lhs, const TransposeExp<TB> &rhs ){ - return DotExp<TA,TB,false,true>( lhs.self(), rhs.exp, 1.0f ); - } - /*! \brief dot operator def */ - template<typename TA, typename TB> - inline DotExp<TA,TB,true,true> dot( const TransposeExp<TA> &lhs, const TransposeExp<TB> &rhs ){ - return DotExp<TA,TB,true,true>( lhs.exp, rhs.exp, 1.0f ); - } - /*! \brief dot operator def */ - template<typename TA, typename TB, bool ltrans, bool rtrans > - inline DotExp<TA,TB,ltrans,rtrans> operator*( const DotExp<TA,TB,ltrans,rtrans> &lhs, real_t rhs ){ - return DotExp<TA,TB,ltrans,rtrans>( lhs.lhs_, lhs.rhs_, lhs.scale_ * rhs ); - } - /*! \brief scale of dot operation */ - template<typename TA, typename TB, bool ltrans, bool rtrans > - inline DotExp<TA,TB,ltrans,rtrans> operator*( real_t lhs, const DotExp<TA,TB,ltrans,rtrans> &rhs ){ - return DotExp<TA,TB,ltrans,rtrans>( rhs.lhs_, rhs.rhs_, rhs.scale_ * lhs ); - } - }; // namespace expr - - namespace expr{ - /*! - * \brief binary map expression lhs [op] rhs - * \tparam OP operator - * \tparam TA type of lhs - * \tparam TB type of rhs - * \tparam etype expression type, sa namespace::type - */ - template<typename OP, typename TA, typename TB, int etype > - struct BinaryMapExp: public Exp< BinaryMapExp<OP,TA,TB,etype>, etype >{ - /*! \brief left operand */ - const TA& lhs_; - /*! \brief right operand */ - const TB& rhs_; - /*! \brief constructor */ - BinaryMapExp( const TA &lhs, const TB &rhs ) - :lhs_(lhs), rhs_(rhs){} - }; - - /*! \brief make expression */ - template<typename OP,typename TA, typename TB, int ta, int tb> - inline BinaryMapExp<OP,TA,TB, (ta|tb|type::kMapper) > MakeExp( const Exp<TA,ta> &lhs, const Exp<TB,tb> &rhs ){ - return BinaryMapExp<OP,TA,TB, (ta|tb|type::kMapper) >( lhs.self(), rhs.self() ); - } - - /*! - * \brief short hand for MakeExp, usage F<op>(lhs, rhs). create a binary operation expression - * \param lhs left operand - * \param rhs right operand - * \tparam binary operator - * \tparam TA lhs expression - * \tparam ta lhs expression type - * \tparam TB rhs expression - * \tparam tb rhs expression type - * \sa mshadow::op - */ - template<typename OP,typename TA, typename TB, int ta, int tb> - inline BinaryMapExp<OP,TA,TB, (ta|tb|type::kMapper) > F( const Exp<TA,ta> &lhs, const Exp<TB,tb> &rhs ){ - return MakeExp<OP>( lhs, rhs ); - } - /*! \brief operator overload for const */ - template<typename OP,typename TA, int ta> - inline BinaryMapExp<OP,TA,ScalarExp, (ta|type::kMapper) > F( const Exp<TA,ta> &lhs, const ScalarExp &rhs ){ - return MakeExp<OP>( lhs, rhs ); - } - /*! \brief operator overload for const */ - template<typename OP,typename TB, int tb> - inline BinaryMapExp<OP,ScalarExp,TB, (tb|type::kMapper) > F( const ScalarExp &lhs, const Exp<TB,tb>& rhs ){ - return MakeExp<OP>( lhs, rhs ); - } - - // operator rules - /*! \brief operator overload */ - template<typename TA, typename TB, int ta, int tb> - inline BinaryMapExp<op::plus,TA,TB, (ta|tb|type::kMapper) > operator+( const Exp<TA,ta> &lhs, const Exp<TB,tb> &rhs ){ - return MakeExp<op::plus>( lhs, rhs ); - } - /*! \brief operator overload */ - template<typename TA, typename TB, int ta, int tb> - inline BinaryMapExp<op::minus,TA,TB, (ta|tb|type::kMapper) > operator-( const Exp<TA,ta> &lhs, const Exp<TB,tb> &rhs ){ - return MakeExp<op::minus>( lhs, rhs ); - } - /*! \brief operator overload */ - template<typename TA, typename TB, int ta, int tb> - inline BinaryMapExp<op::mul,TA,TB, (ta|tb|type::kMapper) > operator*( const Exp<TA,ta> &lhs, const Exp<TB,tb> &rhs ){ - return MakeExp<op::mul>( lhs, rhs ); - } - /*! \brief operator overload */ - template<typename TA, typename TB, int ta, int tb> - inline BinaryMapExp<op::div,TA,TB, (ta|tb|type::kMapper) > operator/( const Exp<TA,ta> &lhs, const Exp<TB,tb> &rhs ){ - return MakeExp<op::div>( lhs, rhs ); - } - // constant operators - /*! \brief operator overload */ - template<typename TA, int ta> - inline BinaryMapExp<op::plus, TA, ScalarExp, (ta|type::kMapper) > operator+( const Exp<TA,ta>& lhs, const ScalarExp& rhs ){ - return MakeExp<op::plus>( lhs, rhs ); - } - /*! \brief operator overload */ - template<typename TA, int ta> - inline BinaryMapExp<op::minus, TA, ScalarExp, (ta|type::kMapper) > operator-( const Exp<TA,ta>& lhs, const ScalarExp& rhs ){ - return MakeExp<op::minus>( lhs, rhs ); - } - /*! \brief operator overload */ - template<typename TA, int ta> - inline BinaryMapExp<op::mul, TA, ScalarExp, (ta|type::kMapper) > operator*( const Exp<TA,ta>& lhs, const ScalarExp& rhs ){ - return MakeExp<op::mul>( lhs, rhs ); - } - /*! \brief operator overload */ - template<typename TA, int ta> - inline BinaryMapExp<op::div, TA, ScalarExp, (ta|type::kMapper) > operator/( const Exp<TA,ta>& lhs, const ScalarExp& rhs ){ - return MakeExp<op::div>( lhs, rhs ); - } - // constant operators 2 - /*! \brief operator overload */ - template<typename TB, int tb> - inline BinaryMapExp<op::plus, ScalarExp, TB, (tb|type::kMapper) > operator+( const ScalarExp& lhs, const Exp<TB,tb>& rhs ){ - return MakeExp<op::plus>( lhs, rhs ); - } - /*! \brief operator overload */ - template<typename TB, int tb> - inline BinaryMapExp<op::minus, ScalarExp, TB, (tb|type::kMapper) > operator-( const ScalarExp& lhs, const Exp<TB,tb>& rhs ){ - return MakeExp<op::minus>( lhs, rhs ); - } - /*! \brief operator overload */ - template<typename TB, int tb> - inline BinaryMapExp<op::mul, ScalarExp, TB, (tb|type::kMapper) > operator*( const ScalarExp& lhs, const Exp<TB,tb>& rhs ){ - return MakeExp<op::mul>( lhs, rhs ); - } - /*! \brief operator overload */ - template<typename TB, int tb> - inline BinaryMapExp<op::div, ScalarExp, TB, (tb|type::kMapper) > operator/( const ScalarExp& lhs, const Exp<TB,tb>& rhs ){ - return MakeExp<op::div>( lhs, rhs ); - } - }; - - namespace expr{ - /*! - * \brief unary map expression op(src) - * \tparam OP operator - * \tparam TA type of src - * \tparam etype expression type, sa namespace::type - */ - template<typename OP, typename TA, int etype > - struct UnaryMapExp: public Exp< UnaryMapExp<OP,TA,etype>, etype >{ - /*! \brief source expression */ - const TA& src_; - /*! \brief constructor */ - UnaryMapExp( const TA &src ):src_(src){} - }; - - /*! \brief make expression */ - template<typename OP,typename TA, int ta> - inline UnaryMapExp<OP,TA,(ta|type::kMapper) > MakeExp( const Exp<TA,ta> &src ){ - return UnaryMapExp<OP,TA, (ta|type::kMapper) >( src.self() ); - } - - /*! - * \brief short hand for MakeExp, usage F<op>(src), create a unary operation expression - * \param src source expression - * \tparam operator - * \tparam TA source expression - * \tparam ta source expression type - * \sa mshadow::op - */ - template<typename OP,typename TA, int ta> - inline UnaryMapExp<OP,TA,(ta|type::kMapper) > F( const Exp<TA,ta> &src ){ - return MakeExp<OP>(src); - } - }; -}; -#endif
