SINGA-176 - Add loss and metric base classes Add loss and metric base classes, and implement the MSE as a sub-class of Loss and the Accuracy as a subclass of Metric.
Add math functions to support the metric/loss classes. Draft test files for MSE and Accuracy. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d6800791 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d6800791 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d6800791 Branch: refs/heads/master Commit: d680079165496da2787064d04daf283f5b3e7bba Parents: 72923b1 Author: wangwei <[email protected]> Authored: Sun May 22 23:12:30 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Thu May 26 14:09:53 2016 +0800 ---------------------------------------------------------------------- include/singa/core/tensor.h | 21 +- include/singa/model/loss.h | 61 +++++ include/singa/model/metric.h | 57 +++++ src/core/tensor/math_kernel.cu | 421 ++++++++++++++++++++++++++++++++ src/core/tensor/math_kernel.h | 82 +++++++ src/core/tensor/tensor.cc | 96 +++++++- src/core/tensor/tensor_math.h | 66 +++-- src/core/tensor/tensor_math_cpp.h | 54 ++++ src/core/tensor/tensor_math_cuda.h | 34 ++- src/model/loss/mse.h | 66 +++++ src/model/metric/accuracy.h | 82 +++++++ src/proto/layer.proto | 13 +- test/singa/test_accuracy.cc | 35 +++ test/singa/test_mse.cc | 88 +++++++ test/singa/test_tensor.cc | 8 +- test/singa/test_tensor_math.cc | 8 +- 16 files changed, 1146 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/include/singa/core/tensor.h ---------------------------------------------------------------------- diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h index 359f1ee..e560071 100644 --- a/include/singa/core/tensor.h +++ b/include/singa/core/tensor.h @@ -121,7 +121,7 @@ class Tensor { } /// Reset the tensor shape, it may reallocate blob, if MemSize() changes. - void ReShape(const Shape& shape); + void Reshape(const Shape& shape); /// Reset the shape, device, and data type as given tensor. /// If blob size changes, then reallocate a new blob. The previous blob would @@ -138,6 +138,10 @@ class Tensor { /// Equivalent to ToDevice(host_dev). void ToHost(); + /// Set each element of the tensor to be x + template<typename SType> + void SetValue(SType x); + /// For init the tensor values, copy 'num' elements. template<typename DType> void CopyDataFromHostPtr(const DType* src, size_t num); @@ -223,8 +227,23 @@ Tensor ReLU(const Tensor& t); Tensor Sigmoid(const Tensor& t); Tensor Sign(const Tensor& t); Tensor Sqrt(const Tensor& t); +Tensor Square(const Tensor& t); Tensor Tanh(const Tensor& t); + +template<typename SType> +SType Sum(const Tensor& t); +/// Sum elements in the Tensor, currently only support vector and matrix. +/// if 'axis' is 0, sum all rows into a single row +/// if 'axis' is 1, sum all columns into a single column +/// TODO(wangwei) support arbitrary Tensor like numpy.sum +Tensor Sum(const Tensor& t, int axis); + +/// Average elements in the Tensor, currently only support vector and matrix. +/// if 'axis' is 0, average all rows into a single row +/// if 'axis' is 1, average all columns into a single column +/// TODO(wangwei) support arbitrary Tensor like numpy.average +Tensor Average(const Tensor&t, int axis); /// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis] rows, /// and shape_[axis+1]*...*shape_[nDim()] columns. /// and do softmax along each row. http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/include/singa/model/loss.h ---------------------------------------------------------------------- diff --git a/include/singa/model/loss.h b/include/singa/model/loss.h new file mode 100644 index 0000000..6c79e7b --- /dev/null +++ b/include/singa/model/loss.h @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SINGA_MODEL_LOSS_H_ +#define SINGA_MODEL_LOSS_H_ +#include "singa/proto/layer.pb.h" +#include "singa/core/tensor.h" +namespace singa { + +/// The base loss class, which declares the APIs for computing the objective +/// score (loss) for a pair of prediction (from the model) and the target (i.e. +/// the ground truth). It also computes the gradients of the objective w.r.t. +/// the prediction. It has similar APIs as Layer. +template <typename T = Tensor> +class Loss { + public: + Loss() = default; + void Setup(const string& conf) { + LossConf loss; + loss.ParseFromString(conf); + Setup(loss); + } + + /// Set meta fields from user configurations. + virtual void Setup(const LossConf& conf) {} + + /// Compute the loss values for each sample/instance given the prediction + /// and the target. + virtual Tensor Forward(const Tensor& prediction, const T& target) = 0; + + /// Average loss values for all samples in the mini-batch + /// It calls Forward() internally. The calling pattern should be + /// [Evaluate|Forward] Backward. + float Evaluate(const Tensor& prediction, const T& target) { + const Tensor& loss = Forward(prediction, target); + return Sum<float>(loss) / (1.0f * loss.Size()); + } + + /// Compute the gradients of the loss values w.r.t. the prediction. + virtual Tensor Backward() = 0; +}; +} // namespace singa + +#endif // SINGA_MODEL_LOSS_H_ + + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/include/singa/model/metric.h ---------------------------------------------------------------------- diff --git a/include/singa/model/metric.h b/include/singa/model/metric.h new file mode 100644 index 0000000..6519028 --- /dev/null +++ b/include/singa/model/metric.h @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SINGA_MODEL_METRIC_H_ +#define SINGA_MODEL_METRIC_H_ +#include "singa/core/tensor.h" +#include "singa/proto/layer.pb.h" +namespace singa { + +/// The base metric class, which declares the APIs for computing the performance +/// evaluation metrics given the prediction of the model and the ground truth, +/// i.e., the target. +/// The target type is a template argument. For data samples with a single +/// label, T could be 1-d tenor (or vector<int>); If each data sample has +/// multiple labels, T could be vector<vector<int>>, one vector per sample. +template <typename T = Tensor> +class Metric { + public: + // TODO(wangwei) call Setup using a default MetricConf. + Metric() = default; + void Setup(const string& conf) { + MetricConf metric; + metric.ParseFromString(conf); + Setup(metric); + } + + /// Set meta fields from user configurations. + virtual void Setup(const MetricConf& conf) {} + + /// Compute the metric for each data sample + virtual Tensor Forward(const Tensor& prediction, const T& target) = 0; + + /// Comptue the metric value averaged over all samples (in a batch) + float Evaluate(const Tensor& prediction, const T& target) { + const Tensor& metric = Forward(prediction, target); + return Sum<float>(metric) / (1.0f * metric.Size()); + } +}; + +} // namespace singa + +#endif // SINGA_MODEL_METRIC_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/src/core/tensor/math_kernel.cu ---------------------------------------------------------------------- diff --git a/src/core/tensor/math_kernel.cu b/src/core/tensor/math_kernel.cu new file mode 100644 index 0000000..585d65d --- /dev/null +++ b/src/core/tensor/math_kernel.cu @@ -0,0 +1,421 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifdef USE_CUDA +#include <cmath> +#include <algorithm> +#include "./math_kernel.h" + +#define CU2DBLOCK_X 32 +#define CU2DBLOCK_Y 32 + +#define CU1DBLOCK 1024 +#define CU1DBLOCKF 1024.0 + +// Cuda Kernel Functions +namespace cuda { +__global__ void kernel_softmax_loss(const float *prob, const int *label, + float *loss, int n, int dim) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + float prob_of_truth = prob[index * dim + label[index]]; + loss[index] -= log(max(prob_of_truth, FLT_MIN)); + } +} + +__global__ void kernel_softmax_gradient(float *grad, const int *label, int n, + int dim, float scale) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + int pos = index * dim + label[index]; + grad[pos] = (grad[pos] - 1.0f) * scale; + } +} + +__global__ void kernel_sum_vec(float *data, float *sum, int n) { + int THREADS = blockDim.x; + + __shared__ float aux[CU1DBLOCK]; + int steps = (n - 1) / THREADS + 1; + aux[threadIdx.x] = data[threadIdx.x]; + + for (int i = 1; i < steps; ++i) { + if (threadIdx.x + i * THREADS < n) { + aux[threadIdx.x] += data[threadIdx.x + i * THREADS]; + } + } + + int total_threads = THREADS; + __syncthreads(); + + while (total_threads > 1) { + int half_point = ((1 + total_threads) >> 1); + if (threadIdx.x < half_point) { + if (threadIdx.x + half_point < total_threads) { + aux[threadIdx.x] += aux[threadIdx.x + half_point]; + } + } + __syncthreads(); + total_threads = ((total_threads + 1) >> 1); + } + + __syncthreads(); + *sum = aux[0]; +} + +__global__ void kernel_sum_col(const float *src_mat_data, float *dst_vec_data, + int rows, int cols, int stride) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < rows; index += num_threads) { + dst_vec_data[index] = 0.0f; + for (int k = 0; k < cols; k++) { + dst_vec_data[index] += src_mat_data[index * stride + k]; + } + } +} + +__global__ void kernel_sum_row(const float *src_mat_data, float *dst_vec_data, + int rows, int cols, int stride) { + int j = blockIdx.x; + int THREADS = blockDim.x; + if (j >= cols) { + return; + } + + __shared__ float aux[CU1DBLOCK]; + int steps = (rows - 1) / THREADS + 1; + aux[threadIdx.x] = src_mat_data[j + threadIdx.x * stride]; + for (int i = 1; i < steps; ++i) { + if (threadIdx.x + i * THREADS < rows) { + aux[threadIdx.x] += + src_mat_data[j + (threadIdx.x + i * THREADS) * stride]; + } + } + + int total_threads = THREADS; + __syncthreads(); + while (total_threads > 1) { + int half_point = ((1 + total_threads) >> 1); + if (threadIdx.x < half_point) { + if (threadIdx.x + half_point < total_threads) { + aux[threadIdx.x] += aux[threadIdx.x + half_point]; + } + } + __syncthreads(); + total_threads = ((total_threads + 1) >> 1); + } + + __syncthreads(); + dst_vec_data[j] = aux[0]; +} + +__global__ void kernel_add_vec_row(const float *src_vec_data, + const float *src_mat_data, + float *des_mat_data, int rows, int cols, + int stride) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + int j = blockIdx.y * blockDim.y + threadIdx.y; + int num_threads_x = blockDim.x * gridDim.x; + int num_threads_y = blockDim.y * gridDim.y; + int index = 0; + for (; i < cols && j < rows; i += num_threads_x, j += num_threads_y) { + index = j * stride + i; + des_mat_data[index] = src_mat_data[index] + src_vec_data[i]; + } +} + +__global__ void kernel_exp(const float *src_data, float *des_data, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = exp(src_data[index]); + } +} + +__global__ void kernel_log(const float *src_data, float *des_data, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = log(src_data[index]); + } +} + +__global__ void kernel_sigmoid(const float *src_data, float *des_data, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = 1.0f / (1.0f + expf(-src_data[index])); + } +} + +__global__ void kernel_sigmoid_grad(const float *src_data, float *des_data, + int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = src_data[index] * (1.0f - src_data[index]); + } +} + +__global__ void kernel_relu(const float *src_data, float *des_data, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = max(src_data[index], 0.0f); + } +} + +__global__ void kernel_relu_grad(const float *src_data, float *des_data, + int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = src_data[index] > 0.0f ? 1.0f : 0.0f; + } +} + +__global__ void kernel_tanh(const float *src_data, float *des_data, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = tanhf(src_data[index]); + } +} + +__global__ void kernel_tanh_grad(const float *src_data, float *des_data, + int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = (1.0f - src_data[index] * src_data[index]); + } +} + +__global__ void kernel_softplus(const float *src_data, float *des_data, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = logf(1 + expf(src_data[index])); + } +} + +__global__ void kernel_softplus_grad(const float *src_data, float *des_data, + int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = 1.0f / (1.0f + expf(-src_data[index])); + } +} + +__global__ void kernel_square(const float *src_data, float *des_data, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = src_data[index] * src_data[index]; + } +} + +__global__ void kernel_square_grad(const float *src_data, float *des_data, + int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = 2 * sqrt(src_data[index]); + } +} + +__global__ void kernel_sqrt(const float *src_data, float *des_data, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = sqrt(src_data[index]); + } +} + +__global__ void kernel_pow(const float *src_data_a, const float *src_data_b, + float *des_data, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = pow(src_data_a[index], src_data_b[index]); + } +} + +__global__ void kernel_mult(const float *src_data_a, const float *src_data_b, + float *des_data, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = src_data_a[index] * src_data_b[index]; + } +} + +__global__ void kernel_div(const float *src_data_a, const float *src_data_b, + float *des_data, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = src_data_a[index] / src_data_b[index]; + } +} + +__global__ static void kernel_set_value(float *data, float value, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + data[index] = value; + } +} + +__global__ void kernel_threshold(const float *src_data, float *des_data, + float alpha, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int num_threads = blockDim.x * gridDim.x; + for (; index < n; index += num_threads) { + des_data[index] = src_data[index] < alpha ? 1.0f : 0.0f; + } +} + +/* +void softmaxloss_forward(int n, int dim, const float *prob, + const int *label, float *loss) { + kernel_softmax_loss<<<ceil(n/CU1DBLOCKF), CU1DBLOCKF>>>(prob, label, loss, n, + dim); +} + +void softmaxloss_backward(int n, int dim, float scale, + const int *label, float *grad) { + kernel_softmax_gradient<<<ceil(n/CU1DBLOCKF), CU1DBLOCKF>>>(grad, label, n, + dim, scale); +} +*/ +void sum(int n, const float *in, float *out) { + int threads_per_block = n > CU1DBLOCK ? CU1DBLOCK : n; + // here, we only need one block + int num_blocks = 1; + + kernel_sum_vec<<<num_blocks, threads_per_block>>>(in, out, n); +} + +void sum_row(int rows, int cols, int stride, const float *in, float *out) { + int threads_per_block = rows > CU1DBLOCK ? CU1DBLOCK : rows; + int num_blocks = cols; + + kernel_sum_row<<<num_blocks, threads_per_block>>>(in, out, rows, cols, + stride); +} + +void sum_col(int rows, int cols, int stride, const float *in, float *out) { + int threads_per_block = cols > CU1DBLOCK ? CU1DBLOCK : cols; + int num_blocks = rows; + + kernel_sum_col<<<num_blocks, threads_per_block>>>(src_mat_data, dst_vec_data, + rows, cols, stride); +} +void add_row(int rows, int cols, int stride, const float *in_row, + const float *in_mat, float *out) { + dim3 threads_per_block(CU2DBLOCK_X, CU2DBLOCK_Y); + dim3 num_blocks( + cols / threads_per_block.x + (cols % threads_per_block.x == 0 ? 0 : 1), + rows / threads_per_block.y + (rows % threads_per_block.y == 0 ? 0 : 1)); + kernel_add_vec_row<<<num_blocks, threads_per_block>>>(in_row, in_mat, out, + rows, cols, stride); +} + +void exp(int n, const float *in, float *out) { + kernel_exp<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void log(int n, const float *in, float *out) { + kernel_log<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void sigmoid(int n, const float *in, float *out) { + kernel_sigmoid<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void sigmoid_grad(int n, const float *in, float *out) { + kernel_sigmoid_grad<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void relu(int n, const float *in, float *out) { + kernel_relu<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void relu_grad(int n, const float *in, float *out) { + kernel_relu_grad<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void tanh(int n, const float *in, float *out) { + kernel_tanh<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void tanh_grad(int n, const float *in, float *out) { + kernel_tanh_grad<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void softplus(int n, const float *in, float *out) { + kernel_softplus<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void softplus_grad(int n, const float *in, float *out) { + kernel_softplus_grad<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void square(int n, const float *in, float *out) { + kernel_square<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void square_grad(int n, const float *in, float *out) { + kernel_square_grad<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void sqrt(int n, const float *in, float *out) { + kernel_sqrt<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, n); +} + +void pow(int n, const float *a, const float *b, float *out) { + kernel_pow<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(a, b, out, n); +} + +void mult(int n, const float *a, const float *b, float *out) { + kernel_mult<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(a, b, out, n); +} + +void div(int n, const float *a, const float *b, float *out) { + kernel_div<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(a, b, out, n); +} + +void set_value(int n, float v, float *out) { + kernel_set_value<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(out, v, n); +} + +void threshold(int n, float alpha, const float *in, float *out) { + kernel_threshold<<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>>(in, out, alpha, n); +} +} // namespace cuda +} // namespace singa + +#endif // USE_CUDA http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/src/core/tensor/math_kernel.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/math_kernel.h b/src/core/tensor/math_kernel.h new file mode 100644 index 0000000..7629ac8 --- /dev/null +++ b/src/core/tensor/math_kernel.h @@ -0,0 +1,82 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ +#ifndef SRC_CORE_TENSOR__MATH_KERNEL_H_ +#define SRC_CORE_TENSOR__MATH_KERNEL_H_ + +namespace singa { + +/* + void softmaxloss_forward(int n, int dim, const float *prob, + const int *label, float *loss); + + void softmaxloss_backward(int n, int dim, float scale, + const int *label, float *grad); +*/ +// TODO(wangwei) make all function templates. +namespace cuda { +void sum(int n, const float *in, float *out); + +void sum_row(int rows, int cols, int stride, const float *in, float *out); + +void sum_col(int rows, int cols, int stride, const float *in, float *out); + +void add_row(int rows, int cols, int stride, const float *in_row, + const float *in_mat, float *out); + +void exp(int n, const float *in, float *out); + +void log(int n, const float *in, float *out); + +void sigmoid(int n, const float *in, float *out); + +void sigmoid_grad(int n, const float *in, float *out); + +void relu(int n, const float *in, float *out); + +void relu_grad(int n, const float *in, float *out); + +void tanh(int n, const float *in, float *out); + +void tanh_grad(int n, const float *in, float *out); + +void softplus(int n, const float *in, float *out); + +void softplus_grad(int n, const float *in, float *out); + +void square(int n, const float *in, float *out); + +void square_grad(int n, const float *in, float *out); + +void sqrt(int n, const float *in, float *out); + +void pow(int n, const float *a, const float *b, float *out); + +void mult(int n, const float *a, const float *b, float *out); + +void div(int n, const float *a, const float *b, float *out); + +void set_value(int n, float v, float *out); + +void threshold(int n, float alpha, const float *in, float *out); +} // cuda +} // namespace singa + +#endif // SRC_CORE_TENSOR__MATH_KERNEL_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/src/core/tensor/tensor.cc ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc index 185b1f9..052f3ff 100644 --- a/src/core/tensor/tensor.cc +++ b/src/core/tensor/tensor.cc @@ -77,7 +77,7 @@ void Tensor::ResetLike(const Tensor& t) { } } -void Tensor::ReShape(const Shape& shape) { +void Tensor::Reshape(const Shape& shape) { if (shape_ != shape) { if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_); blob_ = device_->NewBlob(Product(shape) * SizeOf(data_type_)); @@ -119,6 +119,7 @@ void Tensor::CopyDataFromHostPtr(const DType* src, size_t num) { } } template void Tensor::CopyDataFromHostPtr(const float* src, size_t num); +template void Tensor::CopyDataFromHostPtr(const int* src, size_t num); void Tensor::CopyData(const Tensor& src) { CHECK_EQ(Size(), src.Size()); @@ -279,6 +280,20 @@ void CopyDataToFrom(Tensor* dst, const Tensor& src, size_t num, } \ } while (0) + +template <typename SType> +void Tensor::SetValue(SType x) { + CHECK_EQ(sizeof(SType), SizeOf(data_type_)); + auto size = Size(); + auto ptr = blob_; + TYPE_LANG_SWITCH(data_type_, DType, device_->lang(), Lang, { + device_->Exec( + [size, x, ptr](Context* ctx) { Set<DType, Lang>(size, x, ptr, ctx); }, + {}, {ptr}); + }); +} + + #define EltwiseUnaryTensorFn(fn, t, ret) \ do { \ TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { \ @@ -305,8 +320,87 @@ GenUnaryTensorFunction(ReLU); GenUnaryTensorFunction(Sigmoid); GenUnaryTensorFunction(Sign); GenUnaryTensorFunction(Sqrt); +GenUnaryTensorFunction(Square); GenUnaryTensorFunction(Tanh); +// TODO(wangwei) consider matrix transpose. +Tensor SumRows(const Tensor& t) { + int ndim = t.shape().size(); + CHECK_EQ(ndim, 2) << "Cannot do SumRows for Tensor with ndim = " << ndim; + size_t nrow = t.shape().at(0), ncol = t.shape().at(1); + Tensor ret(Shape{ncol}, t.device(), t.data_type()); + TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { + ret.device()->Exec( + [nrow, ncol, t, ret](Context* ctx) { + SumRows<DType, Lang>(nrow, ncol, t.blob(), ret.blob(), ctx); + }, + {t.blob()}, {ret.blob()}); + }); + return ret; +} + +// TODO(wangwei) consider matrix transpose. +Tensor SumColumns(const Tensor& t) { + int ndim = t.shape().size(); + CHECK_EQ(ndim, 2) << "Cannot do SumColumns for Tensor with ndim = " << ndim; + CHECK(!t.transpose()); // TODO(wangwei) enable transpose + size_t nrow = t.shape().at(0), ncol = t.shape().at(1); + Tensor ret(Shape{nrow}, t.device(), t.data_type()); + TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { + ret.device()->Exec( + [nrow, ncol, t, ret](Context* ctx) { + SumColumns<DType, Lang>(nrow, ncol, t.blob(), ret.blob(), ctx); + }, + {t.blob()}, {ret.blob()}); + }); + return ret; +} + +// TODO(wangwei) conside async exec +template<> +float Sum<float>(const Tensor& t) { + float s = 0.0f; + TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { + t.device()->Exec( + [t, &s](Context* ctx) { + Sum<DType, Lang>(t.Size(), t.blob(), &s, ctx); + }, + {t.blob()}, {}); + }); + return s; +} + +Tensor Sum(const Tensor& t, int axis) { + if (axis == 0) { + return SumRows(t); + } else { + CHECK_EQ(axis, 1) << "Not support Sum over axis = " << axis; + return SumColumns(t); + } +} + +Tensor Average(const Tensor& t, int axis) { + // operator/ only has implementation for float scalar type, hence it is + // necessary to cast the denominator to a float. + // TODO(wangwei) implement function for cast scalar type involved in Tensor + // functions. E.g., + // template<S, D> + // D CastTo(S x) { + // return D(x); + // } + // for speical types, e.g., fp16: + // tempalte<> + // fp16 CastType(float x) { + // .... + // } + if (axis == 0) { + return Sum(t, 0) / (1.0f * t.shape().at(0)); + } else { + CHECK_EQ(axis, 1); + return Sum(t, 1) / (1.0f * t.shape().at(1)); + } +} + Tensor Softmax(const Tensor& t, int axis) { Tensor ret(t.shape(), t.device(), t.data_type()); Softmax(t, &ret, axis); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/src/core/tensor/tensor_math.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h index 53e979b..d55e15a 100644 --- a/src/core/tensor/tensor_math.h +++ b/src/core/tensor/tensor_math.h @@ -50,6 +50,10 @@ void Abs(int count, const Blob* input, Blob* ret, Context* ctx) { LOG(FATAL) << "Not Implemented"; } +template <typename DType, typename Lang> +void Set(int count, DType x, Blob* ret, Context* ctx) { + LOG(FATAL) << "Not Implemented"; +} /// sum all elements of input into ret template <typename DType, typename Lang> void Sum(int count, const Blob* input, DType* ret, Context* ctx) { @@ -80,6 +84,12 @@ void Sqrt(int count, const Blob* input, Blob* ret, Context* ctx) { LOG(FATAL) << "Not Implemented"; } +/// Element-wise operation, ret[i]=square([input[i]) +template <typename DType, typename Lang> +void Square(int count, const Blob* input, Blob* ret, Context* ctx) { + LOG(FATAL) << "Not Implemented"; +} + /// Element-wise operation, ret[i]=tanh([input[i]) template <typename DType, typename Lang> void Tanh(int count, const Blob* input, Blob* ret, Context* ctx) { @@ -102,6 +112,35 @@ void Softmax(int nrow, int ncol, const Blob* input, Blob* ret, Context* ctx) { LOG(FATAL) << "Not Implemented"; } +// TODO(wangwei) unify SumRow and SumCol. +/// Sum the rows of the input matrix into a vector +template <typename DType, typename Lang> +void SumRows(int nrow, int ncol, const Blob* input, Blob* ret, Context* ctx) { + LOG(FATAL) << "Not Implemented"; +} + +/// Sum the columns of the input matrix into a vector +template <typename DType, typename Lang> +void SumColumns(int nrow, int ncol, const Blob* input, Blob* ret, Context* ctx) { + LOG(FATAL) << "Not Implemented"; +} + +// TODO(wangwei) unify AddRow and AddCol. +/// Add the vector v to every row of A as the row of ret +template <typename DType, typename Lang> +void AddRow(int nrow, int ncol, const Blob* A, const Blob* v, Blob* ret, + Context* ctx) { + LOG(FATAL) << "Not Implemented"; +} + +/// Add the vector v to every column of A as the column of ret +template <typename DType, typename Lang> +void AddCol(int nrow, int ncol, const Blob* A, const Blob* v, Blob* ret, + Context* ctx) { + LOG(FATAL) << "Not Implemented"; +} + + /// Element-wise operation, do v^x for every v from the input tensor template <typename DType, typename Lang> void Pow(int count, const Blob* input, DType x, Blob* ret, Context* ctx) { @@ -177,33 +216,6 @@ void Outer(int m, int n, const Blob* lhs, const Blob* rhs, Blob* ret, LOG(FATAL) << "Not Implemented"; } -// TODO(wangwei) unify SumRow and SumCol. -/// Sum the rows of the input matrix into a vector -template <typename DType, typename Lang> -void SumRow(int nrow, int ncol, const Blob* input, Blob* ret, Context* ctx) { - LOG(FATAL) << "Not Implemented"; -} -/// Sum the rows of the input matrix into a vector -template <typename DType, typename Lang> -void SumCol(int nrow, int ncol, const Blob* input, Blob* ret, Context* ctx) { - LOG(FATAL) << "Not Implemented"; -} - -// TODO(wangwei) unify AddRow and AddCol. -/// Add the vector v to every row of A as the row of ret -template <typename DType, typename Lang> -void AddRow(int nrow, int ncol, const Blob* A, const Blob* v, Blob* ret, - Context* ctx) { - LOG(FATAL) << "Not Implemented"; -} - -/// Add the vector v to every column of A as the column of ret -template <typename DType, typename Lang> -void AddCol(int nrow, int ncol, const Blob* A, const Blob* v, Blob* ret, - Context* ctx) { - LOG(FATAL) << "Not Implemented"; -} - // ===== BLAS functions, ref to http://docs.nvidia.com/cuda/cublas // ===== Level 1 /// return the index of the element with the max value. http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/src/core/tensor/tensor_math_cpp.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h index b58e3bd..c584b69 100644 --- a/src/core/tensor/tensor_math_cpp.h +++ b/src/core/tensor/tensor_math_cpp.h @@ -26,6 +26,16 @@ namespace singa { template <> +void Square<float, lang::Cpp>(int count, const Blob* input, + Blob* ret, Context* ctx) { + float* dptr = static_cast<float*>(ret->mutable_data()); + const float* in = static_cast<const float*>(input->data()); + for (int i = 0; i < count; i++) { + dptr[i] = in[i] * in[i]; + } +} + +template <> void Add<float, lang::Cpp>(int count, const Blob* lhs, const Blob* rhs, Blob* ret, Context* ctx) { // CHECK_EQ(ctx->stream, nullptr); @@ -36,6 +46,50 @@ void Add<float, lang::Cpp>(int count, const Blob* lhs, const Blob* rhs, dptr[i] = lptr[i] + rptr[i]; } } + + +// sum all elements of input into ret +// TODO(wangwei) optimize using omp +template <> +void Sum<float, lang::Cpp>(int count, const Blob* input, float* ret, + Context* ctx) { + float s = 0.f; + const float* in = static_cast<const float*>(input->data()); + for (int i = 0; i < count; i++) { + s += in[i]; + } + *ret = s; +} + +// TODO(wangwei) optimize using omp +template <> +void SumRows<float, lang::Cpp>(int nrow, int ncol, const Blob* input, Blob* ret, + Context* ctx) { + float* dptr = static_cast<float*>(ret->mutable_data()); + const float* in = static_cast<const float*>(input->data()); + memset(dptr, 0, ncol * sizeof(float)); + for (int r = 0; r < nrow; r++) { + for (int c = 0; c < ncol; c++) { + dptr[c] += in[r * ncol + c]; + } + } +} + +// Sum the rows of the input matrix into a vector +// TODO(wangwei) optimize using omp +template <> +void SumColumns<float, lang::Cpp>(int nrow, int ncol, const Blob* input, Blob* ret, + Context* ctx) { + float* dptr = static_cast<float*>(ret->mutable_data()); + const float* in = static_cast<const float*>(input->data()); + memset(dptr, 0, ncol * sizeof(float)); + for (int r = 0; r < nrow; r++) { + for (int c = 0; c < ncol; c++) { + dptr[r] += in[r * ncol + c]; + } + } +} + template <> void EltwiseMult<float, lang::Cpp>(int count, const Blob* input, float x, Blob* ret, Context* ctx) { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/src/core/tensor/tensor_math_cuda.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math_cuda.h b/src/core/tensor/tensor_math_cuda.h index 40f9210..2e497d2 100644 --- a/src/core/tensor/tensor_math_cuda.h +++ b/src/core/tensor/tensor_math_cuda.h @@ -18,14 +18,14 @@ #ifndef SINGA_CORE_TENSOR_TENSOR_MATH_CUDA_H_ #define SINGA_CORE_TENSOR_TENSOR_MATH_CUDA_H_ -#include "./tensor_math.h" #include "singa_config.h" +#ifdef USE_CUDA +#include "./tensor_math.h" +#include "./math_kernel.h" #include "singa/core/common.h" - namespace singa { -#ifdef USE_CUDA template<> void Add<float, lang::Cuda>(int count, const Blob* lhs, const Blob* rhs, Blob* ret, Context* ctx) { @@ -38,9 +38,35 @@ void Add<float, lang::Cuda>(int count, const Blob* lhs, const Blob* rhs, cublasSaxpy(ctx->cublas_handle, 1.0f, rptr, 1, ptr, 1); */ } +// sum all elements of input into ret +// TODO(wangwei) optimize using stream +template <> +void Sum<float, lang::Cuda>(int count, const Blob* input, float* ret, + Context* ctx) { + const float* in = static_cast<const float*>(input->data()); + cuda::sum(count, in, ret); +} + +// TODO(wangwei) optimize using stream +template <> +void SumRows<float, lang::Cuda>(int nrow, int ncol, const Blob* input, + Blob* ret, Context* ctx) { + float* dptr = static_cast<float*>(ret->mutable_data()); + const float* in = static_cast<const float*>(input->data()); + cuda::sum_row(nrow, ncol, ncol, in, dptr); +} -#endif +// Sum the rows of the input matrix into a vector +// TODO(wangwei) optimize using stream +template <> +void SumColumns<float, lang::Cuda>(int nrow, int ncol, const Blob* input, + Blob* ret, Context* ctx) { + float* dptr = static_cast<float*>(ret->mutable_data()); + const float* in = static_cast<const float*>(input->data()); + cuda::sum_col(nrow, ncol, ncol, in, dptr); +} } +#endif // USE_CUDA #endif // SINGA_CORE_TENSOR_TENSOR_MATH_CUDA_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/src/model/loss/mse.h ---------------------------------------------------------------------- diff --git a/src/model/loss/mse.h b/src/model/loss/mse.h new file mode 100644 index 0000000..5799f13 --- /dev/null +++ b/src/model/loss/mse.h @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SINGA_MODEL_LOSS_MSE_H_ +#define SINGA_MODEL_LOSS_MSE_H_ +#include <stack> +#include "singa/model/loss.h" + +namespace singa { + +/// MSE is for mean squared error or squared euclidean distance. +class MSE : public Loss<Tensor> { + public: + /// Compute the loss values for each sample/instance given the prediction + /// and the target, which is 0.5/||prediction-target||^2 + /// Users can call Average(const Tensor&) to get the average + /// loss value over all samples in the batch. + Tensor Forward(const Tensor& prediction, const Tensor& target) override; + + /// Compute the gradients of the loss values w.r.t. the prediction, + /// which is (prediction-target)/batchsize + Tensor Backward() override; + + private: + // to buffer intermediate data, i.e., prediction-target + std::stack<Tensor> buf_; +}; + +Tensor MSE::Forward(const Tensor& prediction, const Tensor& target) { + CHECK(buf_.empty()) << "Do not call Forward successively for more than twice." + << " The calling pattern is [Forward|Evaluate] Backward"; + Tensor t = prediction - target; + size_t batchsize = 1; + if (t.nDim() > 1) batchsize = t.shape().at(0); + size_t dim = t.Size() / batchsize; + t.Reshape(Shape{batchsize, dim}); + buf_.push(t); + // TODO(wangwei) use CastType for operator/ + return Sum(Square(t), 1); +} + +Tensor MSE::Backward() { + const Tensor& ret = buf_.top(); + buf_.pop(); + return ret / (1.0f * ret.shape().at(0)); +} +} // namespace singa + +#endif // SINGA_MODEL_LOSS_H_ + + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/src/model/metric/accuracy.h ---------------------------------------------------------------------- diff --git a/src/model/metric/accuracy.h b/src/model/metric/accuracy.h new file mode 100644 index 0000000..05c1643 --- /dev/null +++ b/src/model/metric/accuracy.h @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SINGA_MODEL_METRIC_ACCURACY_H_ +#define SINGA_MODEL_METRIC_ACCURACY_H_ +#include "singa/model/metric.h" +namespace singa { + +/// Compute the accuray of the prediction, which is matched against the +/// ground truth labels. +/// TODO(wangwei) consider multi-label cases. +class Accuracy : public Metric<Tensor> { + public: + /// Set meta fields from user configurations. + void Setup(const MetricConf& conf) override { top_k_ = conf.top_k(); } + + /// Check the prediction against the target (ground truth) for each data + /// sample. The returned Tensor has a float value for each sample, 0 for wrong + /// and 1 for correct. Users can call Sum(const Tensor&) / Tensor::Size() to + /// get the accuracy. + Tensor Forward(const Tensor& prediction, const Tensor& target); + + private: + /// \copydoc Match(const Tensor&, const Tensor&); + Tensor Match(const Tensor& prediction, const vector<int>& target); + /// If the ground truth label is in the top k predicted labels, then the + /// prediction is correct. + size_t top_k_ = 1; +}; + +Tensor Accuracy::Match(const Tensor& prediction, const vector<int>& target) { + size_t batchsize = target.size(); + size_t nb_classes = prediction.Size() / batchsize; + // each row of prediction is the prob distribution for one sample + CHECK_EQ(prediction.shape().at(0), batchsize); + const float* prob = prediction.data<const float*>(); + float* score = new float[batchsize]; + for (size_t b = 0; b < batchsize; b++) { + vector<std::pair<float, int>> prob_class; + for (size_t c = 0; c < nb_classes; c++) { + prob_class.push_back(std::make_pair(prob[b * nb_classes + c], c)); + } + std::partial_sort(prob_class.begin(), prob_class.begin() + top_k_, + prob_class.end(), std::greater<std::pair<float, int>>()); + + for (size_t k = 0; k < top_k_; k++) + if (prob_class.at(k).second == target.at(b)) score[b] = 1; + } + Tensor ret(Shape{batchsize}); + ret.CopyDataFromHostPtr(score, batchsize); + return ret; +} + +// TODO(wangwei) consider multi-label cases, where target is of shape +// nb_samples * nb_classes +Tensor Accuracy::Forward(const Tensor& prediction, const Tensor& target) { + vector<int> target_vec; + // TODO(wangwei) copy target to host. + const int* target_value = target.data<const int*>(); + for (size_t i = 0; i < target.Size(); i++) + target_vec.push_back(target_value[i]); + return Match(prediction, target_vec); +} + +} // namespace singa + +#endif // SINGA_MODEL_METRIC_ACCURACY_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/src/proto/layer.proto ---------------------------------------------------------------------- diff --git a/src/proto/layer.proto b/src/proto/layer.proto index 3d130ea..51225ee 100644 --- a/src/proto/layer.proto +++ b/src/proto/layer.proto @@ -157,7 +157,7 @@ message LayerConf { // for their implementation. These layers include an Engine type and // engine parameter for selecting the implementation. // The default for the engine is set by the ENGINE switch at compile-time. - optional AccuracyConf accuracy_conf = 102; + //optional AccuracyConf accuracy_conf = 102; optional ArgMaxConf argmax_conf = 103; optional ConcatConf concat_conf = 104; optional ContrastiveLossConf contrastive_loss_conf = 105; @@ -177,6 +177,8 @@ message LayerConf { optional InnerProductConf inner_product_conf = 117; optional LogConf log_conf = 134; optional LRNConf lrn_conf = 118; + // Used in SINGA + optional MetricConf metric_conf = 200; // optional MemoryDataConf memory_data_conf = 119; optional MVNConf mvn_conf = 120; optional PoolingConf pooling_conf = 121; @@ -230,10 +232,7 @@ message LossConf { optional bool normalize = 2 [default = true]; } -// Messages that store hyper-parameters used by individual layer types follow, in -// alphabetical order. - -message AccuracyConf { +message MetricConf { // When computing accuracy, count as correct by comparing the true label to // the top k scoring classes. By default, only compare to the top scoring // class (i.e. argmax). @@ -249,6 +248,10 @@ message AccuracyConf { // If specified, ignore instances with the given label. optional int32 ignore_label = 3; } +// Messages that store hyper-parameters used by individual layer types follow, in +// alphabetical order. + + message ArgMaxConf { // If true produce pairs (argmax, maxval) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/test/singa/test_accuracy.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_accuracy.cc b/test/singa/test_accuracy.cc new file mode 100644 index 0000000..dc7719b --- /dev/null +++ b/test/singa/test_accuracy.cc @@ -0,0 +1,35 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#include "gtest/gtest.h" +#include "../src/model/metric/accuracy.h" + +TEST(Accuracy, Compute) { + singa::Accuracy acc; + singa::Tensor p(singa::Shape{2, 3}); + singa::Tensor t(singa::Shape{2}, singa::kInt); + const float pdat[6] = {0.1, 0.3, 0.6, 0.3, 0.2, 0.5}; + const int tdat[2] = {1, 2}; // one wrong, one correct + p.CopyDataFromHostPtr(pdat, sizeof(pdat) / sizeof(float)); + t.CopyDataFromHostPtr(tdat, sizeof(pdat) / sizeof(float)); + float a = acc.Evaluate(p, t); + EXPECT_FLOAT_EQ(a, 0.5f); +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/test/singa/test_mse.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_mse.cc b/test/singa/test_mse.cc new file mode 100644 index 0000000..9056176 --- /dev/null +++ b/test/singa/test_mse.cc @@ -0,0 +1,88 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#include "gtest/gtest.h" +#include "singa/core/tensor.h" +#include "singa/core/device.h" +#include "../src/model/loss/mse.h" + +using singa::Tensor; +class TestMSE : public ::testing::Test { + protected: + virtual void SetUp() { + p.Reshape(singa::Shape{2, 3}); + t.Reshape(singa::Shape{2, 3}); + p.CopyDataFromHostPtr(pdat, sizeof(pdat) / sizeof(float)); + t.CopyDataFromHostPtr(tdat, sizeof(pdat) / sizeof(float)); + } + const float pdat[6] = {0.1, 1.1, 2.1, 0.3, 2.2, 1.8}; + const float tdat[6] = {0.1, 1.1, 2.0, 0.3, 2.2, 1.8}; + + singa::Tensor p, t; +}; + +TEST_F(TestMSE, CppForward) { + singa::MSE mse; + const Tensor& loss = mse.Forward(p, t); + auto ldat = loss.data<const float*>(); + + EXPECT_FLOAT_EQ(ldat[0], 0.005); + EXPECT_FLOAT_EQ(ldat[1], 0); +} + +TEST_F(TestMSE, CudaForward) { + singa::MSE mse; + singa::CudaGPU dev; + p.ToDevice(&dev); + t.ToDevice(&dev); + Tensor loss = mse.Forward(p, t); + + loss.ToHost(); + auto ldat = loss.data<const float*>(); + + for (size_t i = 0; i < loss.Size(); i++) + EXPECT_FLOAT_EQ(ldat[i], 0.5 * (pdat[i] - tdat[i]) * (pdat[i] - tdat[i])); +} + +TEST_F(TestMSE, CppBackward) { + singa::MSE mse; + mse.Forward(p, t); + const Tensor& grad = mse.Backward(); + + auto gdat = grad.data<const float*>(); + + for (size_t i = 0; i < grad.Size(); i++) + EXPECT_FLOAT_EQ(gdat[i], pdat[i] - tdat[i]); +} + +TEST_F(TestMSE, CudaBackward) { + singa::MSE mse; + singa::CudaGPU dev; + p.ToDevice(&dev); + t.ToDevice(&dev); + mse.Forward(p, t); + Tensor grad = mse.Backward(); + grad.ToHost(); + auto gdat = grad.data<const float*>(); + + for (size_t i = 0; i < grad.Size(); i++) + EXPECT_FLOAT_EQ(gdat[i], pdat[i] - tdat[i]); +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/test/singa/test_tensor.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_tensor.cc b/test/singa/test_tensor.cc index b3f0c6b..f9acdb0 100644 --- a/test/singa/test_tensor.cc +++ b/test/singa/test_tensor.cc @@ -35,18 +35,18 @@ TEST(TensorTest, TestConstructor) { TEST(TensorClass, Reshape) { Tensor t; - t.ReShape(Shape{2,3}); + t.Reshape(Shape{2,3}); EXPECT_TRUE((Shape{2,3} == t.shape())); - t.ReShape(Shape{3,3, 4}); + t.Reshape(Shape{3,3, 4}); EXPECT_TRUE((Shape{3,3, 4} == t.shape())); - t.ReShape(Shape{12}); + t.Reshape(Shape{12}); EXPECT_TRUE((Shape{12} == t.shape())); Tensor o; EXPECT_TRUE(o.shape() != t.shape()); - o.ReShape(Shape{3, 3}); + o.Reshape(Shape{3, 3}); EXPECT_TRUE(o.shape() != t.shape()); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d6800791/test/singa/test_tensor_math.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_tensor_math.cc b/test/singa/test_tensor_math.cc index eee18ec..fb7e3e8 100644 --- a/test/singa/test_tensor_math.cc +++ b/test/singa/test_tensor_math.cc @@ -9,10 +9,10 @@ class TestTensorMath : public ::testing::Test { virtual void SetUp() { const float dat1[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; const float dat2[] = {1.1f, 2.1f, 3.1f, 4.1f, 5.1f, 6.1f}; - a.ReShape(singa::Shape{6}); - b.ReShape(singa::Shape{6}); - c.ReShape(singa::Shape{6, 1}); - d.ReShape(singa::Shape{3, 2}); + a.Reshape(singa::Shape{6}); + b.Reshape(singa::Shape{6}); + c.Reshape(singa::Shape{6, 1}); + d.Reshape(singa::Shape{3, 2}); a.CopyDataFromHostPtr<float>(dat1, 6); b.CopyDataFromHostPtr<float>(dat2, 6);
