http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/cluster_rt.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/cluster_rt.h b/include/singa/utils/cluster_rt.h deleted file mode 100644 index 4ab48bd..0000000 --- a/include/singa/utils/cluster_rt.h +++ /dev/null @@ -1,105 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_UTILS_CLUSTER_RT_H_ -#define SINGA_UTILS_CLUSTER_RT_H_ - -#include <map> -#include <mutex> -#include <string> -#include <vector> - -namespace singa { - -typedef void (*rt_callback)(void *contest); - -struct RTCallback { - rt_callback fn; - void* ctx; -}; - -/** - * ClusterRuntime is a runtime service that manages dynamic configuration - * and status of the whole cluster. It mainly provides following services: - * 1) Provide running status of each server/worker - * 2) Translate process id to (hostname:port) - */ -class ClusterRuntime { - public: - // ClusterRuntime have different implementation determined when compiling - static ClusterRuntime* Create(const std::string&host, int job_id); - - virtual ~ClusterRuntime() {} - /** - * Initialize the runtime instance - */ - virtual bool Init() = 0; - /** - * register the process, and get a unique process id - * - * \return the process id, -1 if failed - */ - virtual int RegistProc(const std::string& host_addr, int pid) = 0; - /** - * translate the process id to host address - * - * \return the host and port, "" if no such proc id - */ - virtual std::string GetProcHost(int proc_id) = 0; - /** - * Server: watch all workers in a server group, - * will be notified when all workers have left - */ - virtual bool WatchSGroup(int gid, int sid, rt_callback fn, void* ctx) = 0; - /** - * Worker: join a server group (i.e. start to read/update these servers) - */ - virtual bool JoinSGroup(int gid, int wid, int s_group) = 0; - /** - * Worker: leave a server group (i.e. finish its all work) - */ - virtual bool LeaveSGroup(int gid, int wid, int s_group) = 0; -}; - -/* - * A ClusterRuntime implementation for single-process environment - */ -class SPClusterRT : public ClusterRuntime { - public: - ~SPClusterRT(); - - bool Init() override; - int RegistProc(const std::string& host_addr, int pid) override; - std::string GetProcHost(int proc_id) override; - bool WatchSGroup(int gid, int sid, rt_callback fn, void* ctx) override; - bool JoinSGroup(int gid, int wid, int s_group) override; - bool LeaveSGroup(int gid, int wid, int s_group) override; - - private: - std::vector<std::string> proc_list_; - std::map<int, std::vector<RTCallback*>> grp_callbacks_; - std::map<int, int> grp_count_; - std::mutex lock_; -}; - -} // namespace singa - -#endif // SINGA_UTILS_CLUSTER_RT_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/common.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/common.h b/include/singa/utils/common.h deleted file mode 100644 index 0bcec58..0000000 --- a/include/singa/utils/common.h +++ /dev/null @@ -1,165 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_UTILS_COMMON_H_ -#define SINGA_UTILS_COMMON_H_ - -#include <google/protobuf/message.h> -#include <unordered_map> -#include <sstream> -#include <string> -#include <vector> -#include <utility> -#include "singa/proto/common.pb.h" - -namespace singa { - -using std::vector; -using std::string; -std::string IntVecToString(const std::vector<int>& vec); -std::string VStringPrintf(std::string fmt, va_list l); -std::string StringPrintf(std::string fmt, ...); - -/** - * Locate the position of the arg in arglist. - * - * @param argc total num of arguments - * @param arglist all arguments - * @param the searched argument - * @return the position of arg in the arglist; -1 if not found. - */ -int ArgPos(int argc, char** arglist, const char* arg); -void CreateFolder(const std::string name); -/** - * Slice a set of large Params into small pieces such that they can be roughtly - * equally partitioned into a fixed number of boxes. - * - * @param num total number of boxes to store the small pieces - * @param sizes size of all Params - * @return all slices for each Param - */ -const std::vector<std::vector<int>> Slice(int num, - const std::vector<int>& sizes); -/** - * Partition slices into boxes. - * - * @param num number of boxes - * @param slices slice sizes - * @return box id for each slice - */ -const std::vector<int> PartitionSlices(int num, const std::vector<int>& slices); -/* -inline void Sleep(int millisec=1){ - std::this_thread::sleep_for(std::chrono::milliseconds(millisec)); -} -*/ -int gcd(int a, int b); -int LeastCommonMultiple(int a, int b); -/* -inline float rand_real() { - return static_cast<float>(rand_r())/(RAND_MAX+1.0f); -} -*/ -std::string GetHostIP(); -void SetupLog(const std::string& workspace, const std::string& model); - -/** - * Performance mtrics. - */ -class Metric { - public: - Metric() {} - explicit Metric(const std::string& str); - /** - * Add one metric. - * - * If the metric exist, the aggregate. Otherwise create a new entry for it. - * - * @param name metric name, e.g., 'loss' - * @param value metric value - */ - void Add(const std::string& name, float value); - void Add(const std::string& name, float value, int count); - /** - * reset all metric counter and value to 0 - */ - void Reset(); - /** - * Generate a one-line string for logging - */ - std::string ToLogString() const; - /** - * Serialize the object into a string - */ - std::string ToString() const; - /** - * Parse the metric from a string - */ - void ParseFrom(const std::string& msg); - - private: - std::unordered_map<std::string, std::pair<int, float>> entry_; -}; - -using google::protobuf::Message; -void Im2col(const float* data_im, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, const int stride_h, const int stride_w, - float* data_col); -void Col2im(const float* data_col, const int channels, - const int height, const int width, const int patch_h, const int patch_w, - const int pad_h, const int pad_w, const int stride_h, const int stride_w, - float* data_im); -void ForwardMaxPooling(const float* bottom, const int num, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, const int stride_h, const int stride_w, - float* top, float* mask); -void BackwardMaxPooling(const float* top, const float* mask, const int num, - const int channels, const int height, const int width, - const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - float* bottom); -void ForwardAvgPooling(const float* bottom, const int num, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, const int stride_h, const int stride_w, - float* top); -void BackwardAvgPooling(const float* top, const int num, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, const int stride_h, const int stride_w, - float* bottom); - -void ReadProtoFromTextFile(const char* filename, Message* proto); -void WriteProtoToTextFile(const Message& proto, const char* filename); -void ReadProtoFromBinaryFile(const char* filename, Message* proto); -void WriteProtoToBinaryFile(const Message& proto, const char* filename); - -/** - * Write a string (e.g., graph reprensetation of a net) into a text file. - */ -void WriteStringToTextFile(const string& filename, const string& context); - -/** - * Parse metric pairs (key = value[, key = value]) from string - */ -const vector<std::pair<string, float>> GetMetricFromString(const string& disp); -} // namespace singa - -#endif // SINGA_UTILS_COMMON_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/context.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h index 3490d29..55e783d 100644 --- a/include/singa/utils/context.h +++ b/include/singa/utils/context.h @@ -30,7 +30,17 @@ #include <vector> #ifdef USE_GPU -#include "singa/utils/cuda_utils.h" +#include <cublas_v2.h> +#include <cuda.h> +#include <cuda_runtime.h> +#include <curand.h> +// CUDA: various checks for different function calls. +#define CUDA_CHECK(condition) \ +/* Code block avoids redefinition of cudaError_t error */ \ +do { \ +cudaError_t error = condition; \ +CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \ +} while (0) #ifdef USE_CUDNN #include <cudnn.h> http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/cuda_utils.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/cuda_utils.h b/include/singa/utils/cuda_utils.h deleted file mode 100644 index 1270e92..0000000 --- a/include/singa/utils/cuda_utils.h +++ /dev/null @@ -1,48 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -/** - * The code is adapted from that of Caffe which is under BSD 2 Clause License. - * - * COPYRIGHT - * All contributions by the University of California: - * Copyright (c) 2014, The Regents of the University of California (Regents) - * All rights reserved. - * All other contributions: - * Copyright (c) 2014, the respective contributors - * All rights reserved. - */ -#ifndef SINGA_UTILS_CUDA_UTILS_H_ -#define SINGA_UTILS_CUDA_UTILS_H_ -#include <cublas_v2.h> -#include <cuda.h> -#include <cuda_runtime.h> -#include <curand.h> - -// CUDA: various checks for different function calls. -#define CUDA_CHECK(condition) \ - /* Code block avoids redefinition of cudaError_t error */ \ - do { \ - cudaError_t error = condition; \ - CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \ - } while (0) - -#endif // SINGA_UTILS_CUDA_UTILS_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/graph.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/graph.h b/include/singa/utils/graph.h deleted file mode 100644 index 2462808..0000000 --- a/include/singa/utils/graph.h +++ /dev/null @@ -1,196 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_UTILS_GRAPH_H_ -#define SINGA_UTILS_GRAPH_H_ - -#include <stack> -#include <string> -#include <map> -#include <vector> -namespace singa { -using std::string; -using std::map; - -/** - * Node class representing a layer in a neural net. - * - * TODO remove layer dependent fields, like origin, and partition_id, to make - * it an independent and simple class. - */ -class Node { - public: - /** - * Node constructor. - * - * @param name identifier of the node, e.g, layer name. - */ - explicit Node(string name); - /** - * Construct a node with specified attributes. - * @param name node identifier - * @param attrs node attributes for printing, including "shape", "color", etc. - * Depending on the visulization engine, if using graphviz, then the attribute - * list is http://www.graphviz.org/content/attrs. - */ - Node(string name, const std::map<string, string>& attrs); - /** - * @deprecated {to make the Graph class an independent class.} - * - * Node constructor used for model partitioning. - * - * This node is a partition of some node. - * @param name node name - * @param origin name of the original node - * @param id partition id of this node - * @param proto conf of the corresponding layer - */ - Node(const string& name, const std::string& origin, int id, void* proto); - ~Node() {} // the proto field is deleted outside by other functions - - - void AddDstNode(Node* dst); - void AddSrcNode(Node* src); - void RemoveDstNode(Node* dst); - void RemoveSrcNode(Node* src); - - string name = ""; - //! name of the origin node/layer from which is node is derived - string origin = ""; - //! partition id - int partition_id = 0; - //! proto of the corresponding layer - void* proto = nullptr; - std::vector<Node*> srcnodes; - std::vector<Node*> dstnodes; - //!< node attribute including shape, color, etc. - std::map<string, string> attrs; -}; - -/** - * Neuralnet is constructed by creating a graph with each node representing one - * layer at first. After topology sort for graph nodes, layers are created and - * connected. - */ -class Graph { - public: - Graph() {} - ~Graph(); - const Graph Reverse() const; - /** - * @return all nodes of the graph - */ - inline const std::vector<Node*>& nodes() const { - return nodes_; - } - /** - * @param name node name - * @return return the node of given name - */ - inline Node* node(const string& name) const { - return name2node_.at(name); - } - /** - * Add an exiting node into this graph. - */ - void AddNode(Node* node); - /** - * Creat an node with the given name and add it into the graph. - * @return the newly created node. - */ - Node* AddNode(const string& name); - /** - * Create an node with the given name and attributes. - */ - Node* AddNode(const string& name, const std::map<string, string>& attrs); - /** - * @deprecated {remove layer related info from node attrs} - * Add a node with given name and other info. - */ - Node* AddNode(const std::string& name, const std::string& origin, int id, - void* proto); - /** - * Add an edge connecting the two given nodes. - */ - void AddEdge(Node* srcnode, Node* dstnode); - /** - * Add an edge connecting the two nodes with the given name. - */ - void AddEdge(const string& src, const std::string& dst); - /** - * Add an edge connecting the two given nodes, the edge attributes are also - * given. - */ - void AddEdge(Node* srcnode, Node* dstnode, - const std::map<string, string>& attrs); - /** - * Add an edge connecting the two nodes with the given names, the edge - * attributes are also given, which are used for printing. - * http://www.graphviz.org/content/attrs - */ - void AddEdge(const string& src, const std::string& dst, - const std::map<string, string>& attrs); - - /** - * Remove the edge connecting the two given nodes. - */ - void RemoveEdge(Node* src, Node* dst); - /** - * Remove the edge connecting two nodes with the given names. - */ - void RemoveEdge(const string &src, const std::string& dst); - /** - * Dump the graph into json string which can be used to draw a picture by - * graphviz. - * - * It calls ToJson(const std::map<std::string, std::string>& label) with - * empty label mapping. - */ - string ToJson() const; - /** - * \copybreif ToJson() - * - * @param label information to be displayed as label for each node - */ - string ToJson(const map<std::string, std::string>& label) const; - /** - * Do topology sort for all nodes of the graph. - */ - void Sort(); - - private: - /** - * - * @return the name of the edge connecting src to dst - */ - const string GetEdgeName(const string& src, const string& dst) const { - return src + "-->" + dst; - } - - private: - std::vector<Node*> nodes_; - std::map<string, Node*> name2node_; - std::map<string, std::map<string, string>> edge_attrs_; -}; - -} // namespace singa - -#endif // SINGA_UTILS_GRAPH_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/image_transform.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/image_transform.h b/include/singa/utils/image_transform.h deleted file mode 100644 index 2867ad2..0000000 --- a/include/singa/utils/image_transform.h +++ /dev/null @@ -1,35 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_UTILS_IMAGE_TRANSFORM_H_ -#define SINGA_UTILS_IMAGE_TRANSFORM_H_ - -#include <glog/logging.h> -// TODO(wangwei) provide image transformation API, the implementation can be -// done by opencv, manual transform, or mshadow. -namespace singa { - -void ImageTransform(const float* in, const float* mean, bool mirror, int h_crop, - int w_crop, int h_offset, int w_offset, int channel, int height, int width, - float scale, float* out); -} // namespace singa - -#endif // SINGA_UTILS_IMAGE_TRANSFORM_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/job_manager.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/job_manager.h b/include/singa/utils/job_manager.h deleted file mode 100644 index 7f1b4f1..0000000 --- a/include/singa/utils/job_manager.h +++ /dev/null @@ -1,79 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_UTILS_JOB_MANAGER_H_ -#define SINGA_UTILS_JOB_MANAGER_H_ - -#include <string> -#include <vector> - -#ifdef USE_ZOOKEEPER -#include "singa/utils/zk_service.h" -#endif - -namespace singa { - -struct JobInfo { - int id; - int procs; - std::string name; -}; - -class JobManager { - public: - // host is comma separated host:port pairs, each corresponding to a zk server. - // e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002" - explicit JobManager(const std::string& host); - - // NOTICE: Init must be called once, before start to use other functions - bool Init(); - // generate a unique job id - bool GenerateJobID(int* id); - // generate a list of hosts for a job conf - bool GenerateHostList(const char* host_file, const char* job_file, - std::vector<std::string>* list); - // list all jobs recorded in zk - bool ListJobs(std::vector<JobInfo>* jobs); - // list running processes for a job - bool ListJobProcs(int job, std::vector<std::string>* procs); - // remove a job path in zk - bool Remove(int job); - // remove all job paths in zk - bool RemoveAllJobs(); - // remove all singa related paths in zk - bool CleanUp(); - - private: - const int kJobsNotRemoved = 10; - - bool CleanPath(const std::string& path, bool remove); - std::string ExtractClusterConf(const char* job_file); - - std::string host_ = ""; -#ifdef USE_ZOOKEEPER - int timeout_ = 30000; - ZKService zk_; -#endif -}; - -} // namespace singa - -#endif // SINGA_UTILS_JOB_MANAGER_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/math_addr.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_addr.h b/include/singa/utils/math_addr.h deleted file mode 100644 index cf1d227..0000000 --- a/include/singa/utils/math_addr.h +++ /dev/null @@ -1,279 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_UTILS_MATH_ADDR_H_ -#define SINGA_UTILS_MATH_ADDR_H_ - -extern "C" { -#include <cblas.h> -} -#ifdef USE_GPU -#include <cuda_runtime.h> -#include <cublas_v2.h> -#endif - -#include "singa/utils/singa_op.h" - -namespace singa { -template<typename Dtype> -Dtype cpu_asum(int n, const Dtype* A, int inc) { - return cblas_sasum(n, A, inc); -} - -template<typename Dtype> -void cpu_gemm(const Dtype * A, const Dtype * B, - const int m, const int n, const int k, const Dtype alpha, const Dtype beta, - const bool TranA, const bool TranB, Dtype * C) { - int lda, ldb; - CBLAS_TRANSPOSE tA, tB; - lda = TranA ? m : k; - ldb = TranB ? k : n; - tA = TranA ? CblasTrans : CblasNoTrans; - tB = TranB ? CblasTrans : CblasNoTrans; - cblas_sgemm(CblasRowMajor, tA, tB, m, n, k, alpha, A, lda, - B, ldb, beta, C, n); -} - -// should be very careful: -// m is the length of B, and n is the length of C , A is a n*m matrix -template<typename Dtype> -void cpu_gemv(const Dtype * A, const Dtype * B, const int m, const int n, - const Dtype alpha, const Dtype beta, const bool TranA, Dtype * C) { - CBLAS_TRANSPOSE tA; - tA = TranA ? CblasTrans : CblasNoTrans; - cblas_sgemv(CblasRowMajor, tA, m, n, alpha, A, n, B, 1, beta, C, 1); -} - -template<typename Dtype> -void cpu_axpy(const int n, const Dtype alpha, const Dtype * A, Dtype * B) { - cblas_saxpy(n, alpha, A, 1, B, 1); -} - -template<typename Dtype> -void cpu_scale(const int n, const Dtype alpha, Dtype * A) { - cblas_sscal(n, alpha, A, 1); -} - -template<typename Dtype> -void cpu_copy(const int n, const Dtype* A, Dtype *B) { - cblas_scopy(n, A, 1, B, 1); -} - -template<typename Dtype> -Dtype cpu_dot(const int n, const Dtype * A, const Dtype * B) { - Dtype sum = 0; - for (int i = 0 ; i < n ; i++) - sum += A[i] * B[i]; - return sum; -} - -// element-wise -template<typename Op, typename Dtype> -void cpu_e_f(const int n, const Dtype * A, Dtype * B) { - for (int i = 0 ; i < n ; i++) { - Op::Map(A[i], &B[i]); - } -} - -template<typename Op, typename Dtype> -void cpu_e_f(const int n, const Dtype * A, const Dtype * B, Dtype * C) { - for (int i = 0 ; i < n ; i++) { - Op::Map(A[i], B[i], &C[i]); - } -} -template<typename Op, typename Dtype> -void cpu_e_f(const int n, const Dtype alpha, const Dtype * A, Dtype * B) { - for (int i = 0 ; i < n ; i++) { - Op::Map(alpha, A[i], &B[i]); - } -} - -template<typename Op, typename Dtype> -void cpu_e_f(const int n, const Dtype alpha, const Dtype * A, const Dtype * B, - Dtype * C) { - for (int i = 0 ; i < n ; i++) { - Op::Map(alpha, A[i], B[i], &C[i]); - } -} -// element-wise generalized operation defined in Op - - -// matrix/vector expand/reduce - -template<typename Op, typename Dtype> -void cpu_reduce_f(const Dtype * A, const int m, const int n, Dtype * B) { - for (int i = 0 ; i < m ; i++) { - Op::Map(A+i*n, n, B[i]); - } -} -// reduce each row of A to an element of B e.g. the sum operation in softmax -template<typename Op, typename Dtype> -void cpu_expand_f(const Dtype * A, const int m, const int n, Dtype * B) { - for (int i = 0 ; i < m ; i++) { - Op::Map(A[i], n, B+i*n); - } -} - - -template<typename Dtype> -void cpu_softmax(int nb_rows, int nb_cols, const Dtype* A, Dtype* B) { - for (int i = 0; i < nb_rows; i++) { - const Dtype* dptr = A + i * nb_cols; - Dtype mmax = dptr[0]; - for (int x = 1; x < nb_cols; ++x) - if (mmax < dptr[x]) mmax = dptr[x]; - Dtype sum = 0.0f; - for (int x = 0; x < nb_cols; ++x) { - dptr[x] = std::exp(dptr[x] - mmax); - sum += dptr[x]; - } - for (int x = 0; x < nb_cols; ++x) { - dptr[x] /= sum; - } - } -} - - - -template<typename Dtype, typename URNG> -void cpu_sample_uniform(URNG& g, int n, Dtype low, Dtype high, Dtype* A) { - std::uniform_real_distribution<Dtype> distribution(low, high); - for (int i = 0; i < n; i++) - A[i] = distribution(g); -} - -template<typename Dtype, typename URNG> -void cpu_sample_gaussian(URNG& g, int n, Dtype mean, Dtype std, Dtype* A) { - std::normal_distribution<Dtype> distribution(mean, std); - for (int i = 0; i < n; i++) - A[i] = distribution(g); -} - -#ifdef USE_GPU -template<typename Dtype> -Dtype gpu_asum(cublasHandle_t handle, int n, const Dtype* A, int inc) { - Dtype result = 0.0; - cublasSasum(handle, n, A, inc, &result); - return result; -} - -template<typename Dtype> -void gpu_gemm(cublasHandle_t handle, const Dtype * A, const Dtype * B, - const int m, const int n, const int k, const Dtype alpha, const Dtype beta, - const bool TranA, const bool TranB, Dtype * C) { - int lda = TranA ? m : k; - int ldb = TranB ? k : n; - int ldc = n; - cublasOperation_t tA = (TranA == false) ? CUBLAS_OP_N : CUBLAS_OP_T; - cublasOperation_t tB = (TranB == false) ? CUBLAS_OP_N : CUBLAS_OP_T; - cublasSgemm(handle, tB, tA, n, m, k, &alpha, B, ldb, - A, lda, &beta, C, ldc); -} - -template<typename Dtype> -void gpu_gemv(cublasHandle_t handle, const Dtype * A, const Dtype * B, - const int m, const int n, const Dtype alpha, const Dtype beta, - const bool TranA, Dtype * C) { - int lda = n; - cublasOperation_t tA = (TranA == true) ? CUBLAS_OP_N : CUBLAS_OP_T; - cublasSgemv(handle, tA, n, m, &alpha , A, lda, B, 1, &beta, C, 1); -} - -template<typename Dtype> -void gpu_axpy(cublasHandle_t handle, const int n, const Dtype alpha, - const Dtype * A, Dtype * B) { - cublasSaxpy(handle, n, &alpha, A, 1, B, 1); -} - -template<typename Dtype> -void gpu_scale(cublasHandle_t handle, const int n, const Dtype alpha, - Dtype * A) { - cublasSscal(handle, n, &alpha, A, 1); -} - -template<typename Dtype> -Dtype gpu_dot(cublasHandle_t handle, const int n, const Dtype * A, - const Dtype * B) { - Dtype result = 0.0; - cublasSdot(handle, n, A, 1, B, 1, &result); - return result; -} - -// element-wise -template<typename Op, typename Dtype> -void gpu_e_f(const int n, const Dtype alpha, Dtype * A) { - Op::CudaMap(alpha, A, n); -} - -template<typename Op, typename Dtype> -void gpu_e_f(const int n, const Dtype * A, Dtype * B) { - Op::CudaMap(A, B, n); -} - -template<typename Op, typename Dtype> -void gpu_e_f(const int n, const Dtype * A, const Dtype * B, Dtype * C) { - Op::CudaMap(A, B, C, n); -} - -template<typename Op, typename Dtype> -void gpu_e_f(const int n, const Dtype alpha, const Dtype * A, Dtype * B) { - Op::CudaMap(alpha, A, B, n); -} - -template<typename Op, typename Dtype> -void gpu_e_f(const int n, const Dtype alpha, const Dtype beta, - const Dtype * A, const Dtype * B, Dtype * C) { - Op::CudaMap(alpha, beta, A, B, C, n); -} -// element-wise generalized operation defined in Op - -// matrix/vector expand/reduce - -template<typename Op, typename Dtype> -void gpu_reduce_f(const Dtype * A, const int m, const int n, Dtype * B) { - for (int i = 0 ; i < m ; i++) { - Op::CudaMap(A+i*n, n, B[i]); - } -} -// reduce each row of A to an element of B e.g. the sum operation in softmax -template<typename Op, typename Dtype> -void gpu_expand_f(const Dtype * A, const int m, const int n, Dtype * B) { - for (int i = 0 ; i < m ; i++) { - Op::CudaMap(A[i], n, B+i*n); - } -} - - -template<typename Dtype, typename URNG> -void gpu_sample_uniform(URNG g, int n, Dtype low, Dtype high, Dtype* A) { - curandGenerateUniform(g, A, n); -} - -template<typename Dtype, typename URNG> -void gpu_sample_gaussian(URNG g, int n, Dtype mean, Dtype std, Dtype* A) { - curandGenerateNormal(g, A, n, mean, std); -} - -// expand each element in A into a row of B -#endif // USE_GPU - -} // namespace singa -#endif // SINGA_UTILS_MATH_ADDR_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/math_blob.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h deleted file mode 100644 index abe7722..0000000 --- a/include/singa/utils/math_blob.h +++ /dev/null @@ -1,762 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_UTILS_MATH_BLOB_H_ -#define SINGA_UTILS_MATH_BLOB_H_ - -#include <vector> -#include <algorithm> -#include <thread> -#include "singa/utils/blob.h" -#include "singa/utils/singa_op.h" -#include "singa/utils/math_addr.h" -#include "singa/utils/singleton.h" -#include "singa/utils/context.h" - -namespace singa { - -#define NO_GPU LOG(FATAL) << "Not compiled with GPU"; -/** - * \file math_blob.h is not tested thorough. - * Only GEMM() and MMDot() MVSumRow() andMVAddRow() are used now. - */ -/************* BLAS level 1 *****************/ -/** - * Scale each element of A with alpha, and put the result into A. - * Ai = alpha*Ai - * Use blas scale internally. - */ -template<typename Dtype> -void Scale(Dtype alpha, Blob<Dtype> * B) { - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_scale(B->count(), alpha, B->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_scale(context->cublas_handle(device), B->count(), alpha, - B->mutable_gpu_data()); -#else - NO_GPU; -#endif - } -} - -/** - * Element-wise operation: Bi = alpha*Ai+Bi. A and B should have the same size - */ -template<typename Dtype> -void AXPY(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype> * B) { - CHECK_EQ(A.count(), B->count()); - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_axpy(A.count(), alpha, A.cpu_data(), B->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_axpy(context->cublas_handle(device), A.count(), alpha, A.gpu_data(), - B->mutable_gpu_data()); -#else - NO_GPU; -#endif - } -} - -/************* BLAS level 2 *****************/ -/** - * Matrix vector multiplication, C = alpha A(.T) * B + beta C. - * Loose shape checking: - * - dim of A >=2 - * - row of A is shape(0) (no transpose) - * - column of A(.T) == B.count() - * - rows of A(.T) == C.count() - * - * @param[in] alpha - * @param[in] beta - * @param[in] A, matrix - * @param[in] B, vector - * @param[in, out] C, vector - */ -template<typename Dtype> -void GEMV(Dtype alpha, Dtype beta, const Blob<Dtype>& A, - const Blob<Dtype>& B, Blob<Dtype>* C) { - CHECK_EQ(A.shape().size(), 2); - int a1, a2, m, n; - a1 = A.transpose() ? A.count() / A.shape(0) : A.shape(0); - a2 = A.transpose() ? A.shape(0) : A.count() / A.shape(0); - m = B.count(); - n = C->count(); - CHECK_EQ(a2, m) << "# columns of A(.T) must = length of B"; - CHECK_EQ(a1, n) << "# rows of A(.T) must = length of C"; - - bool TranA = A.transpose(); - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_gemv(A.cpu_data(), B.cpu_data(), m, n, alpha, beta, TranA, - C->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_gemv(context->cublas_handle(device), A.gpu_data(), B.gpu_data(), m, n, - alpha, beta, TranA, C->mutable_gpu_data()); -#else - NO_GPU; -#endif // USE_GPU - } -} -/** - * Matrix vector multiplication, C = A(.T) * B, transpose is considered. - * Loose shape checking: - * - dim of A >=2 - * - A.count() % B.count() == 0 - * - B.count() == C.count() - * - * @param[in] A input matrix - * @param[in] B input vector - * @param[out] C output vector - */ -template <typename Dtype> -void MVDot(const Blob<Dtype>& A, const Blob<Dtype>& B, - Blob<Dtype>* C) { - GEMV(Dtype(1), Dtype(0), A, B, C); -} - -/************* BLAS level 3 *****************/ -/** - * Matrix multiplication, C = alpha A*B + beta C, A, B and C are matrix. - * - * Tranpose is considered for A and B. - * Loose shape checking: - * - the first dimension is row (no transpose) or col (with transpose) size - * - shapes match for matrix multiplication - * - * @param[in] alpha - * @param[in] beta - * @param[in] A, matrix - * @param[in] B, matrix - * @param[in, out] C, matrix - */ -template <typename Dtype> -void GEMM(Dtype alpha, Dtype beta, const Blob<Dtype>& A, const Blob<Dtype>& B, - Blob<Dtype> * C) { - CHECK_GE(A.shape().size(), 2); - CHECK_GE(B.shape().size(), 2); - CHECK_GE(C->shape().size(), 2); - int a1, a2, b1, b2, m, n; - CHECK(!C->transpose()); - a1 = A.transpose() ? A.count() / A.shape(0) : A.shape(0); - a2 = A.count() / a1; - b1 = B.transpose() ? B.count() /B.shape(0) : B.shape(0); - b2 = B.count() / b1; - m = C->shape(0); - n = C->count() / m; - CHECK_EQ(a2, b1); - CHECK_EQ(a1, m); - CHECK_EQ(b2, n); - - int k = a2; - bool TranA = A.transpose(); - bool TranB = B.transpose(); - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, k, alpha, beta, TranA, TranB, - C->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_gemm(context->cublas_handle(device), A.gpu_data(), B.gpu_data(), - m, n, k, alpha, beta, TranA, TranB, C->mutable_gpu_data()); -#else - NO_GPU; -#endif // USE_GPU - } -} -/** - * Matrix multiplication, C = A(.T) * B(.T), transpose is considered. - * Strict shape checking: - * - all are matrix - * - shapes match for matrix multiplication - * - * @param[in] A input matrix - * @param[in] B input matrix - * @param[out] C output matrix - */ -template <typename Dtype> -void MMDot(const Blob<Dtype>& A, const Blob<Dtype>& B, - Blob<Dtype>* C) { - GEMM(Dtype(1), Dtype(0), A, B, C); -} - - -/*********************** Inner and Outer product****************************/ -/** - * Inner product for two vectors. - * Loose shape checking, A.count() == B.count. - * - * @param[in] A, input vector (shape checking using A.count()). - * @param[in] B, input vector (shape checking using B.count()). - * @return inner product value. - */ -template <typename Dtype> -Dtype VVDot(const Blob<Dtype> & A, const Blob<Dtype> & B) { - Dtype res = 0; - CHECK_EQ(A.count(), B.count()); - int n = A.count(); - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - res = cpu_dot(n, A.cpu_data(), B.cpu_data()); - } else { -#ifdef USE_GPU - res = gpu_dot(context->cublas_handle(device), n, A.gpu_data(), - B.gpu_data()); -#else - NO_GPU; -#endif // USE_GPU - } - return res; -} - -/** - * Outer product, C = A ** B, transpose is disabled. - * Loose shape checking, A.count() * B.count() == C.count() - * - * @param[in] A, input vector - * @param[in] B, input vector - * @param[out] C, output matrix - */ -template <typename Dtype> -void OuterProduct(const Blob<Dtype>& A, const Blob<Dtype>& B, Blob<Dtype> * C) { - CHECK(!C->transpose()); // do not support C.T now. - - int m = A.count(); - int n = B.count(); - CHECK_EQ(C->count(), m * n); - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, 1, Dtype(1), Dtype(0), false, - false, C->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_gemm(context->cublas_handle(device), A.gpu_data(), B.gpu_data(), - m, n, 1, Dtype(1), Dtype(0), false, false, C->mutable_gpu_data()); -#else - NO_GPU; -#endif // USE_GPU - } -} -/*********************** Element-wise functions ***********************/ -/** - * Apply the function from Op for each element in A and put the result into B, - * i.e., Bi = Op(Ai). - * Loose shape checking, A.count() == B.count(). - */ -template<typename Op, typename Dtype> -void Map(const Blob<Dtype> & A, Blob<Dtype> * B) { - CHECK_EQ(A.count(), B->count()) << "Blobs must have the same size"; - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_e_f<Op>(A.count(), A.cpu_data(), B->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_e_f<Op>(A.count(), A.gpu_data(), B->mutable_gpu_data()); -#else - NO_GPU; -#endif // USE_GPU - } -} - -/** - * Apply the function from Op for each element in A and B, and put the result - * into C, i.e., Ci = Op(Ai, Bi). - * Loose shape checking, A, B and C are of the same size. - */ -template<typename Op, typename Dtype> -void Map(const Blob<Dtype> & A, const Blob<Dtype> & B, Blob<Dtype> * C) { - CHECK_EQ(A.count(), B.count()) << "Blobs must have the same size"; - CHECK_EQ(A.count(), C->count()) << "Blobs must have the same size"; - // cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data()); - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_e_f<Op>(A.count(), A.gpu_data(), B.gpu_data(), C->mutable_gpu_data()); -#else - NO_GPU; -#endif // USE_GPU - } -} - -/** - * Bi = Op(alpha, Ai) - * Loose shape checking, A.count() == B.count(). - */ -template<typename Op, typename Dtype> -void Map(Dtype alpha, const Blob<Dtype>& A, Blob<Dtype>* B) { - CHECK_EQ(A.count(), B->count()) << "Blobs must have the same size"; - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_e_f<Op>(A.count(), alpha, A.cpu_data(), B->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_e_f<Op>(A.count(), alpha, A.gpu_data(), B->mutable_gpu_data()); -#else - NO_GPU; -#endif // USE_GPU - } -} -/** - * Ci = Op(alpha, Ai, Bi) - * Loose shape checking, A, B and C are of the same size. - */ -template<typename Op, typename Dtype> -void Map(Dtype alpha, const Blob<Dtype>& A, const Blob<Dtype>& B, - Blob<Dtype>* C) { - CHECK_EQ(A.count(), B->count()) << "Blobs must have the same size"; - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_e_f<Op>(A.count(), alpha, A.cpu_data(), B->cpu_data(), - C->mutable_cpu_data()); - } else { - // TODO(wangwei) implement gpu version. - NO_GPU; - } -} - -/** - * Currently use std::copy which has shown better performance than memcpy. - * http://stackoverflow.com/questions/4707012/c-memcpy-vs-stdcopy - * TODO(wangwei) test blas copy vs std::copy. - * - * Loose shape checking, A.count() == B.count(). - */ -template<typename Dtype> -void Copy(const Blob<Dtype>& A, Blob<Dtype>* B) { - CHECK_EQ(A.count(), B->count()) << "Blobs must have the same size"; - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - std::copy(A.cpu_data(), A.cpu_data() + A.count(), B->mutable_cpu_data()); - } else { -#ifdef USE_GPU - CUDA_CHECK(cudaMemcpy(static_cast<Dtype*>(B->mutable_gpu_data()), - A.gpu_data(), sizeof(Dtype) * A.count(), cudaMemcpyDefault)); -#else - NO_GPU; -#endif - } -} - - -/** - * B = alpha + A - * Implemented using Copy and AXPY. - */ -template<typename Dtype> -void Add(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype> * B) { - Map<singa::op::Add<Dtype>, Dtype>(alpha, A, B); -} - -/** - * C = A + B - * Implemented using Copy and AXPY. - */ -template<typename Dtype> -void Add(const Blob<Dtype> & A, const Blob<Dtype> & B, - Blob<Dtype> * C) { - Copy(A, C); - AXPY(Dtype(1), B, C); -} - -/** - * B = alpha - A - * Implemented using Copy and AXPY. - */ -template<typename Dtype> -void Sub(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype>* B) { - Map<singa::op::Sub<Dtype>, Dtype>(alpha, A, B); -} - -/** - * C = A - B - * Implemented using Copy and AXPY. - */ -template<typename Dtype> -void Sub(const Blob<Dtype> & A, const Blob<Dtype> & B, - Blob<Dtype> * C) { - Copy(A, C); - AXPY(Dtype(-1), B, C); -} - -/** - * C = A * B, implemented using - * Map(const Blob<Dtype>&, const Blob<Dtype>&, Blob<Dtype>*). - */ -template<typename Dtype> -void Mult(const Blob<Dtype> & A, const Blob<Dtype> & B, - Blob<Dtype> * C) { - Map<singa::op::Mult<Dtype>, Dtype>(A, B, C); - // TODO(wangwei) use MKL's vector func -} - -/** - * C = A / B, implemented using - * Map(const Blob<Dtype>&, const Blob<Dtype>&, Blob<Dtype>*). - */ -template<typename Dtype> -void Div(const Blob<Dtype> & A, const Blob<Dtype> & B, - Blob<Dtype> * C) { - Map<singa::op::Div<Dtype>, Dtype>(A, B, C); - // TODO(wangwei) use MKL's vector func -} -/** - * B = sqrt(A) - */ -template<typename Dtype> -void Sqrt(const Blob<Dtype> & A, Blob<Dtype>* B) { - Map<singa::op::Sqrt<Dtype>, Dtype>(A, B); -} -/** - * B = square(A) - */ -template<typename Dtype> -void Square(const Blob<Dtype> & A, Blob<Dtype>* B) { - Map<singa::op::Square<Dtype>, Dtype>(A, B); -} -/** - * B = exp(A) - */ -template<typename Dtype> -void Exp(const Blob<Dtype> & A, Blob<Dtype>* B) { - Map<singa::op::Exp<Dtype>, Dtype>(A, B); -} -/** - * B = log(A) - */ -template<typename Dtype> -void Log(const Blob<Dtype>& A, Blob<Dtype>* B) { - Map<singa::op::Log<Dtype>, Dtype>(A, B); -} -/** - * B = tanh(A) - */ -template<typename Dtype> -void Tanh(const Blob<Dtype>& A, Blob<Dtype>* B) { - Map<singa::op::Tanh<Dtype>, Dtype>(A, B); -} -/*************************1D<-->2D op/transform***************************/ -/** - * Add A to each column of B, i.e., Bij = alpha*Ai + beta*Bij - * Loose shape checking, B.count() % A.count() == 0. - * # columns of B = B.count() / A.count(). - */ -template<typename Dtype> -void MVAddCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { - if (B->transpose()) { - B->set_transpose(false); - MVAddRow(alpha, beta, A, B); - B->set_transpose(true); - } else { - CHECK_EQ(B->count() % A.count(), 0) << "#col of B not match length of A"; - int m = A.count(), n = B->count() / m; - Blob<Dtype> one(n); - one.SetValue(1); - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_gemm(A.cpu_data(), one.cpu_data(), m, n, 1, alpha, beta, false, false, - B->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_gemm(context->cublas_handle(device), A.gpu_data(), one.gpu_data(), m, - n, 1, alpha, beta, false, false, B->mutable_gpu_data()); -#else - NO_GPU; -#endif // USE_GPU - } - } -} -/** - * Add A to each column of B, i.e., Bij = Ai + Bij - * Loose shape checking, B.count() % A.count() == 0. - * # columns of B = B.count() / A.count(). - */ -template<typename Dtype> -void MVAddCol(const Blob<Dtype> & A, Blob<Dtype>* B) { - MVAddCol(Dtype(1), Dtype(1), A, B); -} - -/** - * Add A to each row of B, i.e., Bij = alpha*Aj + beta*Bij - * Loose shape checking, B.count() % A.count() == 0. - * # rows of B = B.count() / A.count(). - */ -template<typename Dtype> -void MVAddRow(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { - if (B->transpose()) { - B->set_transpose(false); - MVAddCol(alpha, beta, A, B); - B->set_transpose(true); - } else { - CHECK_EQ(B->count() % A.count(), 0) << "#col of B not match length of A"; - int n = A.count(), m = B->count() / n; - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - Blob<Dtype> one(m); - one.SetValue(1); - cpu_gemm(one.cpu_data(), A.cpu_data(), m, n, 1, alpha, beta, - false, false, B->mutable_cpu_data()); - } else { -#ifdef USE_GPU - singa_gpu_add_vec_row(A.gpu_data(), B->gpu_data(), B->mutable_gpu_data(), - m, n, n); -#else - NO_GPU; -#endif // USE_GPU - } - } -} -/** - * Add A to each row of B, i.e., Bij = Aj + Bij - * Loose shape checking, B.count() % A.count() == 0. - * # rows of B = B.count() / A.count(). - */ -template<typename Dtype> -void MVAddRow(const Blob<Dtype> & A, Blob<Dtype>* B) { - MVAddRow(Dtype(1), Dtype(1), A, B); -} - -/** - * Copy A to each column of B, i.e., Bij = Ai - * Loose shape checking, B.count() % A.count() == 0, - * # columns of B = B.count() / A.count(). - */ -template<typename Dtype> -void RepmatCol(const Blob<Dtype> & A, Blob<Dtype> * B) { - MVAddCol(Dtype(1), Dtype(0), A, B); -} - -/** - * Copy A to each row of B, i.e., Bij = Aj - * Loose shape checking, B.count() % A.count() == 0, - * # rows of B = B.count() / A.count(). - */ -template<typename Dtype> -void RepmatRow(const Blob<Dtype> & A, Blob<Dtype> * B) { - MVAddRow(Dtype(1), Dtype(0), A, B); -} - -/** - * Sum all columns of matrix A to a column vector B, - * i.e., Bi = \sum_j {alpha*Aij}+beta*Bi - * Loose shape checking, A.count() % B.count() == 0. - * # columns of A = A.count() / B.count(). - */ -template<typename Dtype> -void MVSumCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { - CHECK_EQ(A.count() % B->count(), 0) << "length of B must = # of cols of A"; - int m = B->count(), n = A.count() / m; - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - Blob<Dtype> one(n); - one.SetValue(1); - cpu_gemm(A.cpu_data(), one.cpu_data(), m, 1, n, alpha, beta, - A.transpose(), false, B->mutable_cpu_data()); - } else { -#ifdef USE_GPU - singa_gpu_sum_col(A.gpu_data(), B->mutable_gpu_data(), m, n, n); -#else - NO_GPU; -#endif // USE_GPU - } -} - -/** - * Sum all rows of matrix A to a row vector B, - * i.e., Bj = \sum_i {alpha*Aij}+beta*Bj - * Loose shape checking, A.count() % B.count() == 0. - * # rows of A = A.count() / B.count(). - */ -template<typename Dtype> -void MVSumRow(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { - CHECK_EQ(A.count() % B->count(), 0) << "length of B must = # of cols of A"; - int n = B->count(), m = A.count() / n; - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - Blob<Dtype> one(m); - one.SetValue(1); - cpu_gemm(one.cpu_data(), A.cpu_data(), 1, n, m, alpha, beta, false, - A.transpose(), B->mutable_cpu_data()); - } else { -#ifdef USE_GPU - singa_gpu_sum_row(A.gpu_data(), B->mutable_gpu_data(), m, n, n); -#else - NO_GPU; -#endif // USE_GPU - } -} - -/** - * Reduce each row of A to an element of B. - * Loose shape checking, A.count() % B.count() == 0. - * # columns of A = A.count() / B.count(). - */ -template<typename Op, typename Dtype> -void Reduce2D(const Blob<Dtype> & A, Blob<Dtype> * B) { - CHECK_EQ(A.count() % B->count(), 0) << "Row size not match B length"; - int m = B->count(), n = A.count() / m; - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_reduce_f<Op>(A.cpu_data(), m, n, B->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_reduce_f<Op>(A.gpu_data(), m, n, B->mutable_gpu_data()); -#else - NO_GPU; -#endif // USE_GPU - } -} -/** - * Duplicate each element of A into a row of B. - * Loose shape checking, B.count() % A.count() == 0. - * # columns of B = B.count() / A.count(). - */ -template<typename Op, typename Dtype> -void Expand2D(const Blob<Dtype> & A, Blob<Dtype> * B) { - CHECK_EQ(B->count() % A.count(), 0) << "Row size of B not match length of A"; - int m = A.count(), n = B->count() / m; - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_expand_f<Op>(A.cpu_data(), m, n, B->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_expand_f<Op>(A.gpu_data(), m, n, B->mutable_gpu_data()); -#else - NO_GPU; -#endif // USE_GPU - } -} - -/** - * Average the absolute values. - */ -template<typename Dtype> -Dtype Asum(const Blob<Dtype>& A) { - if (A.count() == 0) return Dtype(0); - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - Dtype ret = Dtype(0); - if (device < 0) { - ret = cpu_asum(A.count(), A.cpu_data(), 1) / A.count(); - } else { -#ifdef USE_GPU - ret = gpu_asum(context->cublas_handle(device), A.count(), A.gpu_data(), 1) - / A.count(); -#else - NO_GPU; -#endif - } - return ret; -} - - -/*************Random Sample***************/ -template<typename Dtype> -void SampleUniform(Dtype low, Dtype high, Blob<Dtype>* A) { - auto context = Singleton<Context>::Instance(); - const auto& thread = std::this_thread::get_id(); - int device = context->device_id(thread); - if (device < 0) { - cpu_sample_uniform(*context->rand_generator(thread), A->count(), low, high, - A->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_sample_uniform(context->curand_generator(thread), A->count(), low, high, - A->mutable_gpu_data()); -#else - NO_GPU; -#endif - } -} - -template<typename Dtype> -void SampleGaussian(Dtype mean, Dtype std, Blob<Dtype>* A) { - auto context = Singleton<Context>::Instance(); - const auto& thread = std::this_thread::get_id(); - int device = context->device_id(thread); - if (device < 0) { - cpu_sample_gaussian(*context->rand_generator(thread), A->count(), mean, std, - A->mutable_cpu_data()); - } else { -#ifdef USE_GPU - gpu_sample_gaussian(context->curand_generator(thread), A->count(), - mean, std, A->mutable_gpu_data()); -#else - NO_GPU; -#endif - } -} - -/************** Other functions ****************/ -template<typename Dtype> -void Softmax(int nb_rows, const Blob<Dtype>& A, Blob<Dtype>* B) { - CHECK_GT(nb_rows, 0); - CHECK_EQ(A.count() % nb_rows, 0); - CHECK_EQ(A.count(), B->count()); - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - cpu_softmax(nb_rows, A.count() / nb_rows, A.cpu_data(), - B->mutable_cpu_data()); - } else { - // TODO(wangwei) implement the GPU version. - NO_GPU; - } -} - -template<typename Dtype> -void Zero(Blob<Dtype>* B) { - auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); - if (device < 0) { - B->SetValue(0); - } else { -#ifdef USE_GPU - cudaMemset(B->mutable_gpu_data(), 0, B->count() * sizeof(float)); -#else - NO_GPU; -#endif // USE_GPU - } -} -} // end of namespace singa - -#endif // SINGA_UTILS_MATH_BLOB_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/math_kernel.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_kernel.h b/include/singa/utils/math_kernel.h deleted file mode 100644 index 0239d3d..0000000 --- a/include/singa/utils/math_kernel.h +++ /dev/null @@ -1,88 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ -#ifndef SINGA_UTILS_MATH_KERNEL_H_ -#define SINGA_UTILS_MATH_KERNEL_H_ - -namespace singa { - -extern "C" { - void singa_gpu_softmaxloss_forward(int n, int dim, const float *prob, - const int *label, float *loss); - - void singa_gpu_softmaxloss_backward(int n, int dim, float scale, - const int *label, float *grad); - - void singa_gpu_sum_vec(float *data, float *sum , int n); - - void singa_gpu_sum_col(const float *src_mat_data, float *dst_vec_data, - int rows, int cols, int stride); - - void singa_gpu_sum_row(const float *src_mat_data, float *dst_vec_data, - int rows, int cols, int stride); - - void singa_gpu_add_vec_row(const float *src_vec_data, - const float *src_mat_data, float *des_mat_data, - int rows, int cols, int stride); - - void singa_gpu_exp(const float *src_data, float *des_data, int n); - - void singa_gpu_log(const float *src_data, float *des_data, int n); - - void singa_gpu_sigmoid(const float *src_data, float *des_data, int n); - - void singa_gpu_sigmoid_grad(const float *src_data, float *des_data, int n); - - void singa_gpu_relu(const float *src_data, float *des_data, int n); - - void singa_gpu_relu_grad(const float *src_data, float *des_data, int n); - - void singa_gpu_tanh(const float *src_data, float *des_data, int n); - - void singa_gpu_tanh_grad(const float *src_data, float *des_data, int n); - - void singa_gpu_softplus(const float *src_data, float *des_data, int n); - - void singa_gpu_softplus_grad(const float *src_data, float *des_data, int n); - - void singa_gpu_square(const float *src_data, float *des_data, int n); - - void singa_gpu_square_grad(const float *src_data, float *des_data, int n); - - void singa_gpu_sqrt(const float *src_data, float *des_data, int n); - - void singa_gpu_pow(const float *src_data_a, const float *src_data_b, - float *des_data, int n); - - void singa_gpu_mult(const float *src_data_a, const float *src_data_b, - float *des_data, int n); - - void singa_gpu_div(const float *src_data_a, const float *src_data_b, - float *des_data, int n); - - void singa_gpu_set_value(float *data, float value, int n); - - void singa_gpu_threshold(const float *src_data, float *des_data, - float alpha, int n); -}; - -} // namespace singa - -#endif // SINGA_UTILS_MATH_KERNEL_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/param.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/param.h b/include/singa/utils/param.h deleted file mode 100644 index 319f2b4..0000000 --- a/include/singa/utils/param.h +++ /dev/null @@ -1,407 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_UTILS_PARAM_H_ -#define SINGA_UTILS_PARAM_H_ - -#include <memory> -#include <string> -#include <vector> - -#include "singa/comm/msg.h" -#include "singa/proto/job.pb.h" -#include "singa/utils/blob.h" - -namespace singa { -using std::vector; -/** - * Base parameter generator which intializes parameter values. - */ -class ParamGenerator { - public: - static ParamGenerator* Create(const ParamGenProto& proto); - - virtual ~ParamGenerator() {} - - virtual void Init(const ParamGenProto& proto) { proto_ = proto; } - virtual void Fill(Blob<float>* data); - - protected: - ParamGenProto proto_; -}; - -class GaussianGen : public ParamGenerator { - public: - void Fill(Blob<float>* data) override; -}; - -class GaussianSqrtFanInGen : public GaussianGen { - public: - void Fill(Blob<float>* data) override; -}; - -class UniformGen : public ParamGenerator { - public: - void Fill(Blob<float>* data) override; -}; - -class UniformSqrtFanInGen : public UniformGen { - public: - void Fill(Blob<float>* data) override; -}; - -class UniformSqrtFanInOutGen : public UniformGen { - public: - void Fill(Blob<float>* data) override; -}; - -/** - * Base paramter class. - * - * The Param object is a set of parameters, e.g., the (sub) weight matrix or - * (sub) bias vector. - * - * It has at a gradient Blob and data Blob for gradients and parameter values. - * Since some layers (or neuralnet) share parameter values, the data Blob is a - * shared pointer which can be assigned to many Param objects' data field. - * - * It provides access methods like data(), grad(). It also provides functions - * for generating messages and parsing messages to transferring the Param - * objects among worker-worker, worker-server and server-server. - * - * Param objects are of different sizes, which makes it hard to acheive - * load-balance among servers. Hence, we slice large Param objects into small - * pieces. At the server side, one slice is a Param object. - */ -class Param { - public: - /** - * Create an instance of (sub) Param class based on the type from the - * configuration. - * - * @param[in] conf configuration - * @param a pointer to an instance - */ - static Param* Create(const ParamProto& conf); - - /** - * Try to slice the Param objects (from a neural net) into a given number of - * servers (groups) evenly. This is to achieve load-balance among servers. - * - * It does not change the Param objects, but just computes the length of each - * slice. - * - * @param num number of servers (groups) for maintaining the Param objects. - * @param params all Param objects from a neural net. - * @return the length of each slice. - */ - static const vector<int> ComputeSlices(int num, const vector<Param*>& params); - /** - * It computes the length of each slice and slices the Param objects by adding - * the slicing information into every Param object. - * - * @copydetails ComputeSlices() - */ - static void SliceParams(int num, const vector<Param*>& params); - - Param() {} - virtual ~Param() {} - void Init(const ParamProto& proto) { proto_ = proto; } - /** - * Setup param object - * - * @param conf param configuration, include learning rate multiplier etc. - * @param shape one value per dimension - */ - virtual void Setup(const std::vector<int>& shape); - /* - * Fill the values according to init method, e.g., gaussian distribution. - * - * @param version initial version - */ - virtual void InitValues(); - virtual void InitValues(int version); - /** - * Share the data blob from other Param objects. - * - * @param other the Param object whose owner owns the data blob - * @param cpu_only if true, share only cpu memory (used for training with - * multi-gpu cards); else, share both cpu and gpu memory. - */ - void ShareDataFrom(Param* other, bool cpu_only); - /** - * Share both data and grad from other param - */ - void ShareFrom(Param* other); - /** - * Init param values from checkpoint blob. - */ - void FromProto(const BlobProto& blob); - void FromProto(const std::string str); - /** - * Dump param values to blob. - */ - void ToProto(BlobProto* blob); - /** - * Add a slice - * - * @param slice_id - * @param size num of floats for this slice - */ - void AddSlice(int slice_id, int size); - /** - * Scale the learning rate when updating parameters in the Param object - */ - inline float lr_scale() const { return proto_.lr_scale(); } - /** - * Scale the weight decay when updating parameters in the Param object - */ - inline float wd_scale() const { return proto_.wd_scale(); } - /** - * Parameter name used for Param re-use in other model or sharing between - * layers - */ - inline const std::string& name() const { return proto_.name(); } - inline void set_name(const std::string& name) { proto_.set_name(name); } - /** - * If it shares data from others, then owner is the id of that Param, - * otherwise it is itself's id. - */ - inline int owner() const { return proto_.owner(); } - /** - * ID start from 0 and ordered for all Param from the same neuralnet - */ - inline int id() const { return proto_.id(); } - /** - * Set ID - */ - inline void set_id(int id) { - proto_.set_id(id); - proto_.set_owner(id); - } - inline int version() const { return version_; } - inline void set_version(int v) { version_ = v; } - /** - * @return the version of the Param when the last Update request was issued. - */ - inline int last_version() const { return last_version_; } - inline void set_last_version(int v) { last_version_ = v; } - - /** - * @return the sharing Param name which is configured by users in conf file. - */ - inline const std::string& share_from() const { return proto_.share_from(); } - /** - * @return num of parameters in this Param obj. - */ - inline const std::vector<int>& shape() const { return data_.shape(); } - inline int size() const { return data_.count(); } - inline const Blob<float>& data() const { return data_; } - inline Blob<float>* mutable_data() { return &data_; } - inline const Blob<float> &grad() const { return grad_; } - inline Blob<float> *mutable_grad() { return &grad_; } - inline float* mutable_cpu_data() { return data_.mutable_cpu_data(); } - inline float* mutable_cpu_grad() { return grad_.mutable_cpu_data(); } - inline float* mutable_cpu_history() { return history_.mutable_cpu_data(); } - inline float* mutable_cpu_update() { return update_.mutable_cpu_data(); } - /** - * @return slice start ID - */ - inline int slice_start() const { return slice_start_; } - inline int num_slices() const { return num_slices_; } - - /** - * Below are message/request related functions. - * The basic communication workflows are as follow: - *------------------------------------------------------------------------ - * |Put |Get |Update |Sync - *------------------------------------------------------------------------ - * Generate|(stub) |(stub) |(stub) |(server) - * Message |GenPutMsg |GenGetMsg |GenUpdateMsg |GenSyncMsg - *------------------------------------------------------------------------ - * Handle |(server) |(server) |(server) |(server) - * Message |HandlePutMsg|HandleGetMsg |ParseUpdateMsg |HandleSyncMsg - * | | |GenUpdateResMsg | - *------------------------------------------------------------------------ - * Handle | |(stub) |(stub) |(server) - * Response| |ParseGetResMsg|ParseUpdateResMsg|ParseSyncResMsg - *------------------------------------------------------------------------ - */ - - /** - * Generate the message for a put request, i.e., put parameters to a server - * - * This function is called at worker/stub side. - * @param copy decides whether to copy the parameter values from the server. - * @param slice_idx index of the slice from which the message is generated. - * @return generated message without setting src, dst, target fields. - */ - virtual Msg* GenPutMsg(bool copy, int slice_idx); - /** - * Generate the message for a get request, i.e., get parameters from a server - * \copydetails GenPutMsg(bool, int); - */ - virtual Msg* GenGetMsg(bool copy, int slice_idx); - /** - * Generate the message for a update request, i.e., pass info to server for - * parameter update. - * \copydetails GenPutMsg(bool, int); - */ - virtual Msg* GenUpdateMsg(bool copy, int slice_idx); - /** - * Generate the message for a synchronization request between server groups. - * - * This function is called at server side where the Param is actually a slice - * of an original Param object. - * */ - virtual Msg* GenSyncMsg(int offset, int size); - /** - * Server handling function for put request. - * - * @param msg request - * @param reserve if true reserve the msg space for the calling function; - * otherwise the msg should be freed inside the function. - * @return resposne message - */ - virtual Msg* HandlePutMsg(Msg** msg, bool reserve); - /** - * Server handling function for put request. - * - * \copydetails HandleGetMsg(Msg**, bool reserve) - */ - virtual Msg* HandleGetMsg(Msg** msg, bool reserve); - /** - * Server parse update requests. - * \copydetails GenUpdateResponseMsgs(const std::vector<Msg*>& msgs); - */ - virtual void ParseUpdateMsgs(const std::vector<Msg*>& msgs); - /** - * Generate the messages to response the update requests. - * - * This function is called at the server side, where the Param is actually a - * slice of an original Param object. - * - * @param msgs for synchronous training, there would be multiple procs in - * which workers sharing the same Param (slice) objects. Their update requests - * is bufferred and handled together. For asynchrnous training, there is only - * request in msgs. - * @return response messages - */ - virtual const std::vector<Msg*> - GenUpdateResponseMsgs(std::vector<Msg*>* msgs, bool reserve); - /** - * Server handling function for synchronization message - * - * \copydetails HandleGetMsg(Msg**, bool reserve) - */ - virtual Msg* HandleSyncMsg(Msg** msg, bool reserve); - /** - * Worker/Stub parsing function for get response. - * - * @param msg - * @param slice_idx index for the slice - */ - virtual int ParseGetResponseMsg(Msg* msg, int slice_idx); - /** - * Worker/Server parsing function for update response - * - * \copydetails ParseGetResponseMsg(Msg**, int); - */ - virtual int ParseUpdateResponseMsg(Msg* msg, int slice_idx); - /** - * Server parsing function for synchronization response. - * - * \copydetails ParseGetResponseMsg(Msg** , int); - */ - virtual int ParseSyncResponseMsg(Msg* msg, int slice_idx); - - protected: - /** - * Implement the common code of ParseGetResponseMsg and ParseUpdateResponseMsg - * \copydetails ParseSyncResponseMsg(Msg* msg, int slice_idx); - */ - void ParseResponseMsg(Msg* msg, int slice_idx); - - protected: - //!< param version updated by the Update/Sync/Get response - //!< only the owner param is initialized. - int version_ = -1; - //!< param version before last Update/Sync/Get request, set from version_ - int last_version_ = -1; - //!< the global ID of the first slice - int slice_start_ = 0; - //!< total num of slices for this Parm obj - int num_slices_ = 0; - // offset and size of each slice - std::vector<int> slice_offset_; - std::vector<int> slice_size_; - // for debug. Put request has no feedback, we do not track its pending status - std::vector<bool> pending_get_; - std::vector<bool> pending_update_; - int num_pending_requests_ = 0; - // data, gradient, history gradient of this parameter - Blob<float> data_, grad_, history_, update_; - ParamProto proto_; -}; - -/** - * ParamEntry is used for aggregating gradients of Params shared by workers from - * the same group. - * - * For each worker group, every unique Param object has a ParamEntry object. - * Param objects sharing the same values are associated with the same - * ParamEntry. - */ -class ParamEntry { - public: - ParamEntry() {} - ParamEntry(int total, Param* p); - /** - * Associate the counter to a Param object. - * - * @param p - * @param local 1 if it is used by workers in this procs, 0 otherwise - */ - void AddParam(bool local, Param* p); - int next_version = -1; // next_version & num_update are directly used by stub - int num_update = 0; - int num_local = 0; //!< # local workers using the shared parameter - int num_total = 0; //!< # total workers using the shared parameter - //!< Shares are deleted by neuralnet's destructor - std::vector<Param*> shares; -}; - -inline int ParamTrgt(int param_id, int slice_id) { - return (param_id << 16) | slice_id; -} - -inline int ParamID(int param_trgt) { - return param_trgt >> 16; -} - -inline int SliceID(int param_trgt) { - static const int mask = (1 << 16) -1; - return param_trgt & mask; -} - -} // namespace singa - -#endif // SINGA_UTILS_PARAM_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/singa_op.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/singa_op.h b/include/singa/utils/singa_op.h deleted file mode 100644 index 7499eb1..0000000 --- a/include/singa/utils/singa_op.h +++ /dev/null @@ -1,299 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_UTILS_SINGA_OP_H_ -#define SINGA_UTILS_SINGA_OP_H_ - -#include <cmath> -#include <algorithm> - -#ifdef USE_GPU -#include <cuda_runtime.h> -#include <cublas_v2.h> -#include "singa/utils/math_kernel.h" -#endif // USE_GPU - -namespace singa { - -namespace op { - -/** - * b = e^a - */ -template<typename Dtype> -struct Exp { - inline static void Map(const Dtype & a, Dtype * b) { - *b = exp(a); - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_exp(a, b, n); - } -#endif // USE_GPU -}; -/** - * b = log(a), base is e - */ -template<typename Dtype> -struct Log { - inline static void Map(const Dtype & a, Dtype *b) { - *b = log(a); - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_log(a, b, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct Sigmoid { - inline static void Map(const Dtype & a, Dtype * b) { - *b = 1.0f / (1.0f + expf(-a)); - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_sigmoid(a, b, n); - } -#endif // USE_GPU -}; -template<typename Dtype> -struct SigmoidGrad { - inline static void Map(const Dtype & a, Dtype * b) { - *b = a * (1.0f - a); - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_sigmoid_grad(a, b, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct Relu { - inline static void Map(const Dtype & a, Dtype * b) { - *b = std::max(a, 0.0f); - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_relu(a, b, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct ReluGrad { - inline static void Map(const Dtype & a, Dtype * b) { - *b = a > 0 ? 1 : 0; - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_relu_grad(a, b, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct Tanh { - inline static void Map(const Dtype & a, Dtype * b) { - *b = tanhf(a); - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_tanh(a, b, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct TanhGrad { - inline static void Map(const Dtype & a, Dtype * b) { - *b = 1 - a * a; - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_tanh_grad(a, b, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct Softplus { - inline static void Map(const Dtype & a, Dtype * b) { - *b = logf(1 + expf(a)); - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_softplus(a, b, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct SoftplusGrad { - inline static void Map(const Dtype & a, Dtype * b) { - *b = 1.0f / (1.0f + expf(-a)); - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_softplus_grad(a, b, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct Square { - inline static void Map(const Dtype & a, Dtype * b) { - *b = a * a; - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_square(a, b, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct SquareGrad { - inline static void Map(const Dtype & a, Dtype * b) { - *b = 2 * sqrt(a); - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_square_grad(a, b, 1, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct Sqrt { - inline static void Map(const Dtype & a, Dtype * b) { - *b = sqrt(a); - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, Dtype * b, int n) { - singa::singa_gpu_sqrt(a, b, n); - } -#endif // USE_GPU -}; - -/*********************************************************************/ -/** - * c = pow(a, b), i.e., c = a^b - */ -template<typename Dtype> -struct Pow { - inline static void Map(const Dtype & a, const Dtype &b, Dtype * c) { - *c = pow(a, b); - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, - const Dtype * b, Dtype * c, int n) { - singa::singa_gpu_pow(a, b, c, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct Add { - inline static void Map(const Dtype & a, const Dtype & b, Dtype * c) { - *c = a + b; - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, - const Dtype * b, Dtype * c, int n) { -// singa::singa_gpu_add(a, b, c, n); // TODO(haibo) - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct Sub { - inline static void Map(const Dtype & a, const Dtype & b, Dtype * c) { - *c = a - b; - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, - const Dtype * b, Dtype * c, int n) { -// singa::singa_gpu_add(a, b, c, n); // TODO(haibo) - } -#endif // USE_GPU -}; - - -template<typename Dtype> -struct Mult { - inline static void Map(const Dtype & a, const Dtype & b, Dtype * c) { - *c = a * b; - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, - const Dtype * b, Dtype * c, int n) { - singa::singa_gpu_mult(a, b, c, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct Div { - inline static void Map(const Dtype & a, const Dtype & b, Dtype * c) { - *c = a / b; - } -#ifdef USE_GPU - inline static void CudaMap(const Dtype * a, - const Dtype * b, Dtype * c, int n) { - singa::singa_gpu_div(a, b, c, n); - } -#endif // USE_GPU -}; - - -/*********************************************************************/ -template<typename Dtype> -struct Set { - inline static void Map(Dtype alpha, Dtype * a) { - *a = alpha; - } -#ifdef USE_GPU - inline static void CudaMap(Dtype alpha, Dtype * a, int n) { - singa::singa_gpu_set_value(a, alpha, n); - } -#endif // USE_GPU -}; - -template<typename Dtype> -struct Threshold { - inline static void Map(Dtype alpha, const Dtype & a, Dtype * b) { - *b = a < alpha ? 1.0f : 0.0f; - } -#ifdef USE_GPU - inline static void CudaMap(Dtype alpha, const Dtype * a, - Dtype * b, int n) { - singa::singa_gpu_threshold(a, b, alpha, n); - } -#endif // USE_GPU -}; - -}; // namespace op - -}; // namespace singa - -#endif // SINGA_UTILS_SINGA_OP_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/updater.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/updater.h b/include/singa/utils/updater.h deleted file mode 100644 index 33ad8a7..0000000 --- a/include/singa/utils/updater.h +++ /dev/null @@ -1,173 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_UTILS_UPDATER_H_ -#define SINGA_UTILS_UPDATER_H_ - -#include <string> -#include "singa/proto/job.pb.h" -#include "singa/utils/param.h" -#include "singa/neuralnet/layer.h" - -namespace singa { -using std::string; -/** - * Base learning rate generator. - * - * Generate learning rate for a give training step/iteration. - * There are many different ways to change the learning rate through time/step. - * Users can inherint this class to implement their own change method. - */ -class LRGenerator { - public: - static LRGenerator* Create(const LRGenProto& proto); - - virtual ~LRGenerator() {} - - virtual void Init(const LRGenProto& proto) { proto_ = proto; } - /** - * @param step training step/iteration. - * @return base learning rate regardless of step - */ - virtual float Get(int step) { return proto_.base_lr(); } - - protected: - LRGenProto proto_; -}; - -class FixedStepLRGen : public LRGenerator { - public: - float Get(int step) override; - private: - int last_idx_ = 0; -}; - -class StepLRGen : public LRGenerator { - public: - float Get(int step) override; -}; - -class LinearLRGen : public LRGenerator { - public: - float Get(int step) override; -}; - -class ExpLRGen : public LRGenerator { - public: - float Get(int step) override; -}; - -class InvLRGen : public LRGenerator { - public: - float Get(int step) override; -}; - -class InvTLRGen : public LRGenerator { - public: - float Get(int step) override; -}; - -/** - * Updater for Param. - */ -class Updater { - public: - - /* added for python binding */ - static Updater* CreateUpdater(const string str); - /* ------------------------ */ - - static Updater* Create(const UpdaterProto& proto); - - virtual ~Updater() {} - - virtual void Init(const UpdaterProto &proto); - virtual void Update(int step, Param* param, float grad_scale) = 0; - void Clip(const float low, const float high, Param* param); - protected: - UpdaterProto proto_; - LRGenerator* lr_gen_; - float weight_decay_; - float momentum_; - float clip_low_, clip_high_; -}; - -class SGDUpdater : public Updater { - public: - void Update(int step, Param* param, float grad_scale) override; -}; - -class AdaGradUpdater : public Updater { - public: - void Update(int step, Param* param, float grad_scale) override; -}; - - -class NesterovUpdater : public Updater { - public: - void Update(int step, Param* param, float grad_scale) override; -}; - -class RMSPropUpdater : public Updater { - public: - void Init(const UpdaterProto &proto) override; - void Update(int step, Param* param, float grad_scale) override; - - protected: - float rho_; - float delta_; -}; - -class AdaDeltaUpdater : public Updater { - public: - void Init(const UpdaterProto &proto) override; - void Update(int step, Param* param, float grad_scale) override; - - protected: - float rho_; - float delta_; -}; - -class AdamUpdater : public Updater { - public: - void Init(const UpdaterProto &proto) override; - void Update(int step, Param* param, float grad_scale) override; - - protected: - float beta1_; - float beta2_; - float delta_; -}; - -class AdamMaxUpdater : public Updater { - public: - void Init(const UpdaterProto &proto) override; - void Update(int step, Param* param, float grad_scale) override; - - protected: - float beta1_; - float beta2_; - float delta_; -}; - -} // namespace singa - -#endif // SINGA_UTILS_UPDATER_H_
