madlib git commit: SVM: Add minibatch as a new solver [Forced Update!]
Repository: madlib Updated Branches: refs/heads/master 657cf4aa4 -> a8bbe082c (forced update) SVM: Add minibatch as a new solver This work is based on the original work by Xiaocheng Tang in #75. This PR adds two main features: - A Minibatch solver that takes as input a batch of data - SVM code that takes advantage of the minibatch Closes #229 Co-authored-by: Nikhil Kak Co-authored-by: Xiaocheng Tang Project: http://git-wip-us.apache.org/repos/asf/madlib/repo Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/a8bbe082 Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/a8bbe082 Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/a8bbe082 Branch: refs/heads/master Commit: a8bbe082ca60f87e006eba164ea69b159e1875fc Parents: 53db736 Author: Rahul Iyer Authored: Fri Feb 2 14:15:01 2018 -0800 Committer: Rahul Iyer Committed: Fri Feb 2 16:46:48 2018 -0800 -- .gitignore | 1 + src/dbal/EigenIntegration/HandleMap_proto.hpp | 3 +- src/modules/convex/algo/igd.hpp | 81 - src/modules/convex/linear_svm_igd.cpp | 159 + src/modules/convex/linear_svm_igd.hpp | 6 + src/modules/convex/task/linear_svm.hpp | 67 +++- src/modules/convex/type/model.hpp | 71 +--- src/modules/convex/type/state.hpp | 79 + src/modules/convex/type/tuple.hpp | 3 + src/ports/postgres/modules/svm/svm.py_in| 170 +-- src/ports/postgres/modules/svm/svm.sql_in | 117 +++-- src/ports/postgres/modules/svm/test/svm.sql_in | 104 .../modules/utilities/validate_args.py_in | 19 +-- 13 files changed, 774 insertions(+), 106 deletions(-) -- http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/.gitignore -- diff --git a/.gitignore b/.gitignore index 00dc016..a073fbd 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ auto *.swp *.fdb_latexmk *.swo # vim swap file +\#*\# # emacs backup file # Biblatex temporary files *-blx.bib http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/dbal/EigenIntegration/HandleMap_proto.hpp -- diff --git a/src/dbal/EigenIntegration/HandleMap_proto.hpp b/src/dbal/EigenIntegration/HandleMap_proto.hpp index 4bfe7c5..c7b6cb2 100644 --- a/src/dbal/EigenIntegration/HandleMap_proto.hpp +++ b/src/dbal/EigenIntegration/HandleMap_proto.hpp @@ -19,6 +19,7 @@ namespace eigen_integration { template class HandleMap : public Eigen::Map { public: +typedef EigenType PlainEigenType; typedef Eigen::Map Base; typedef typename Base::Scalar Scalar; typedef typename Base::Index Index; @@ -57,7 +58,7 @@ public: * * For example, this allows construction of MappedColumnVector from * MappedMatrix::col(int) or NativeColumnVector, etc. - */ + */ template HandleMap(const Eigen::MapBase& inMappedData, typename boost::enable_if_c::type* = 0) http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/modules/convex/algo/igd.hpp -- diff --git a/src/modules/convex/algo/igd.hpp b/src/modules/convex/algo/igd.hpp index cd17e64..3ae4c13 100644 --- a/src/modules/convex/algo/igd.hpp +++ b/src/modules/convex/algo/igd.hpp @@ -34,7 +34,9 @@ public: typedef typename Task::model_type model_type; static void transition(state_type &state, const tuple_type &tuple); +static void transitionInMiniBatch(state_type &state, const tuple_type &tuple); static void merge(state_type &state, const_state_type &otherState); +static void mergeInPlace(state_type &state, const_state_type &otherState); static void final(state_type &state); }; @@ -56,6 +58,62 @@ IGD::transition(state_type &state, state.task.stepsize * tuple.weight); } +/** + * @brief Update the transition state in mini-batches + * + * Note: We assume that + * 1. Task defines a model_eigen_type + * 2. A batch of tuple.indVar is a Matrix + * 3. A batch of tuple.depVar is a ColumnVector + * 4. Task defines a getLossAndUpdateModel method + * + */ + template + void + IGD::transitionInMiniBatch( +state_type &state, +const tuple_type &tuple) { + +madlib_assert(tuple.indVar.rows() == tuple.depVar.rows(), + std::runtime_error("Invalid data. Independent and dependent " + "batches don't have same number of rows.")); + +int batch_size = state.algo.batchSize; +int n_epochs = state.algo.nEpochs; + +// n_rows/n_ind_cols are the rows/cols in a transition tuple. +int n_r
madlib git commit: SVM: Add minibatch as a new solver
Repository: madlib Updated Branches: refs/heads/master 53db7363a -> 657cf4aa4 SVM: Add minibatch as a new solver This work is based on the original work by Xiaocheng Tang in #75. This PR adds two main features: - A Minibatch solver that takes as input a batch of data - SVM code that takes advantage of the minibatch Closes #229 Co-authored by: Nikhil Kak Co-authored by: Xiaocheng Tang Project: http://git-wip-us.apache.org/repos/asf/madlib/repo Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/657cf4aa Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/657cf4aa Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/657cf4aa Branch: refs/heads/master Commit: 657cf4aa4823731353aaa47db47b9a9b241edaaf Parents: 53db736 Author: Rahul Iyer Authored: Fri Feb 2 14:15:01 2018 -0800 Committer: Rahul Iyer Committed: Fri Feb 2 16:40:49 2018 -0800 -- .gitignore | 1 + src/dbal/EigenIntegration/HandleMap_proto.hpp | 3 +- src/modules/convex/algo/igd.hpp | 81 - src/modules/convex/linear_svm_igd.cpp | 159 + src/modules/convex/linear_svm_igd.hpp | 6 + src/modules/convex/task/linear_svm.hpp | 67 +++- src/modules/convex/type/model.hpp | 71 +--- src/modules/convex/type/state.hpp | 79 + src/modules/convex/type/tuple.hpp | 3 + src/ports/postgres/modules/svm/svm.py_in| 170 +-- src/ports/postgres/modules/svm/svm.sql_in | 117 +++-- src/ports/postgres/modules/svm/test/svm.sql_in | 104 .../modules/utilities/validate_args.py_in | 19 +-- 13 files changed, 774 insertions(+), 106 deletions(-) -- http://git-wip-us.apache.org/repos/asf/madlib/blob/657cf4aa/.gitignore -- diff --git a/.gitignore b/.gitignore index 00dc016..a073fbd 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ auto *.swp *.fdb_latexmk *.swo # vim swap file +\#*\# # emacs backup file # Biblatex temporary files *-blx.bib http://git-wip-us.apache.org/repos/asf/madlib/blob/657cf4aa/src/dbal/EigenIntegration/HandleMap_proto.hpp -- diff --git a/src/dbal/EigenIntegration/HandleMap_proto.hpp b/src/dbal/EigenIntegration/HandleMap_proto.hpp index 4bfe7c5..c7b6cb2 100644 --- a/src/dbal/EigenIntegration/HandleMap_proto.hpp +++ b/src/dbal/EigenIntegration/HandleMap_proto.hpp @@ -19,6 +19,7 @@ namespace eigen_integration { template class HandleMap : public Eigen::Map { public: +typedef EigenType PlainEigenType; typedef Eigen::Map Base; typedef typename Base::Scalar Scalar; typedef typename Base::Index Index; @@ -57,7 +58,7 @@ public: * * For example, this allows construction of MappedColumnVector from * MappedMatrix::col(int) or NativeColumnVector, etc. - */ + */ template HandleMap(const Eigen::MapBase& inMappedData, typename boost::enable_if_c::type* = 0) http://git-wip-us.apache.org/repos/asf/madlib/blob/657cf4aa/src/modules/convex/algo/igd.hpp -- diff --git a/src/modules/convex/algo/igd.hpp b/src/modules/convex/algo/igd.hpp index cd17e64..3ae4c13 100644 --- a/src/modules/convex/algo/igd.hpp +++ b/src/modules/convex/algo/igd.hpp @@ -34,7 +34,9 @@ public: typedef typename Task::model_type model_type; static void transition(state_type &state, const tuple_type &tuple); +static void transitionInMiniBatch(state_type &state, const tuple_type &tuple); static void merge(state_type &state, const_state_type &otherState); +static void mergeInPlace(state_type &state, const_state_type &otherState); static void final(state_type &state); }; @@ -56,6 +58,62 @@ IGD::transition(state_type &state, state.task.stepsize * tuple.weight); } +/** + * @brief Update the transition state in mini-batches + * + * Note: We assume that + * 1. Task defines a model_eigen_type + * 2. A batch of tuple.indVar is a Matrix + * 3. A batch of tuple.depVar is a ColumnVector + * 4. Task defines a getLossAndUpdateModel method + * + */ + template + void + IGD::transitionInMiniBatch( +state_type &state, +const tuple_type &tuple) { + +madlib_assert(tuple.indVar.rows() == tuple.depVar.rows(), + std::runtime_error("Invalid data. Independent and dependent " + "batches don't have same number of rows.")); + +int batch_size = state.algo.batchSize; +int n_epochs = state.algo.nEpochs; + +// n_rows/n_ind_cols are the rows/cols in a transition tuple. +int n_rows = tuple.indV