madlib git commit: SVM: Add minibatch as a new solver [Forced Update!]

2018-02-02 Thread riyer
Repository: madlib
Updated Branches:
  refs/heads/master 657cf4aa4 -> a8bbe082c (forced update)


SVM: Add minibatch as a new solver

This work is based on the original work by
Xiaocheng Tang  in #75.

This PR adds two main features:

- A Minibatch solver that takes as input a batch of data
- SVM code that takes advantage of the minibatch

Closes #229

Co-authored-by: Nikhil Kak 
Co-authored-by: Xiaocheng Tang 


Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/a8bbe082
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/a8bbe082
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/a8bbe082

Branch: refs/heads/master
Commit: a8bbe082ca60f87e006eba164ea69b159e1875fc
Parents: 53db736
Author: Rahul Iyer 
Authored: Fri Feb 2 14:15:01 2018 -0800
Committer: Rahul Iyer 
Committed: Fri Feb 2 16:46:48 2018 -0800

--
 .gitignore  |   1 +
 src/dbal/EigenIntegration/HandleMap_proto.hpp   |   3 +-
 src/modules/convex/algo/igd.hpp |  81 -
 src/modules/convex/linear_svm_igd.cpp   | 159 +
 src/modules/convex/linear_svm_igd.hpp   |   6 +
 src/modules/convex/task/linear_svm.hpp  |  67 +++-
 src/modules/convex/type/model.hpp   |  71 +---
 src/modules/convex/type/state.hpp   |  79 +
 src/modules/convex/type/tuple.hpp   |   3 +
 src/ports/postgres/modules/svm/svm.py_in| 170 +--
 src/ports/postgres/modules/svm/svm.sql_in   | 117 +++--
 src/ports/postgres/modules/svm/test/svm.sql_in  | 104 
 .../modules/utilities/validate_args.py_in   |  19 +--
 13 files changed, 774 insertions(+), 106 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/.gitignore
--
diff --git a/.gitignore b/.gitignore
index 00dc016..a073fbd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ auto
 *.swp
 *.fdb_latexmk
 *.swo  # vim swap file
+\#*\#  # emacs backup file
 
 # Biblatex temporary files
 *-blx.bib

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/dbal/EigenIntegration/HandleMap_proto.hpp
--
diff --git a/src/dbal/EigenIntegration/HandleMap_proto.hpp 
b/src/dbal/EigenIntegration/HandleMap_proto.hpp
index 4bfe7c5..c7b6cb2 100644
--- a/src/dbal/EigenIntegration/HandleMap_proto.hpp
+++ b/src/dbal/EigenIntegration/HandleMap_proto.hpp
@@ -19,6 +19,7 @@ namespace eigen_integration {
 template 
 class HandleMap : public Eigen::Map {
 public:
+typedef EigenType PlainEigenType;
 typedef Eigen::Map Base;
 typedef typename Base::Scalar Scalar;
 typedef typename Base::Index Index;
@@ -57,7 +58,7 @@ public:
  *
  * For example, this allows construction of MappedColumnVector from
  * MappedMatrix::col(int) or NativeColumnVector, etc.
- */ 
+ */
 template 
 HandleMap(const Eigen::MapBase& inMappedData,
 typename boost::enable_if_c::type* = 0)

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/modules/convex/algo/igd.hpp
--
diff --git a/src/modules/convex/algo/igd.hpp b/src/modules/convex/algo/igd.hpp
index cd17e64..3ae4c13 100644
--- a/src/modules/convex/algo/igd.hpp
+++ b/src/modules/convex/algo/igd.hpp
@@ -34,7 +34,9 @@ public:
 typedef typename Task::model_type model_type;
 
 static void transition(state_type , const tuple_type );
+static void transitionInMiniBatch(state_type , const tuple_type 
);
 static void merge(state_type , const_state_type );
+static void mergeInPlace(state_type , const_state_type );
 static void final(state_type );
 };
 
@@ -56,6 +58,62 @@ IGD::transition(state_type ,
 state.task.stepsize * tuple.weight);
 }
 
+/**
+  * @brief Update the transition state in mini-batches
+  *
+  * Note: We assume that
+  * 1. Task defines a model_eigen_type
+  * 2. A batch of tuple.indVar is a Matrix
+  * 3. A batch of tuple.depVar is a ColumnVector
+  * 4. Task defines a getLossAndUpdateModel method
+  *
+ */
+ template 
+ void
+ IGD::transitionInMiniBatch(
+state_type ,
+const tuple_type ) {
+
+madlib_assert(tuple.indVar.rows() == tuple.depVar.rows(),
+  std::runtime_error("Invalid data. Independent and dependent "
+ "batches don't have same number of 
rows."));
+
+int batch_size = state.algo.batchSize;
+

madlib git commit: SVM: Add minibatch as a new solver

2018-02-02 Thread riyer
Repository: madlib
Updated Branches:
  refs/heads/master 53db7363a -> 657cf4aa4


SVM: Add minibatch as a new solver

This work is based on the original work by
Xiaocheng Tang  in #75.

This PR adds two main features:

- A Minibatch solver that takes as input a batch of data
- SVM code that takes advantage of the minibatch

Closes #229

Co-authored by: Nikhil Kak 
Co-authored by: Xiaocheng Tang 


Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/657cf4aa
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/657cf4aa
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/657cf4aa

Branch: refs/heads/master
Commit: 657cf4aa4823731353aaa47db47b9a9b241edaaf
Parents: 53db736
Author: Rahul Iyer 
Authored: Fri Feb 2 14:15:01 2018 -0800
Committer: Rahul Iyer 
Committed: Fri Feb 2 16:40:49 2018 -0800

--
 .gitignore  |   1 +
 src/dbal/EigenIntegration/HandleMap_proto.hpp   |   3 +-
 src/modules/convex/algo/igd.hpp |  81 -
 src/modules/convex/linear_svm_igd.cpp   | 159 +
 src/modules/convex/linear_svm_igd.hpp   |   6 +
 src/modules/convex/task/linear_svm.hpp  |  67 +++-
 src/modules/convex/type/model.hpp   |  71 +---
 src/modules/convex/type/state.hpp   |  79 +
 src/modules/convex/type/tuple.hpp   |   3 +
 src/ports/postgres/modules/svm/svm.py_in| 170 +--
 src/ports/postgres/modules/svm/svm.sql_in   | 117 +++--
 src/ports/postgres/modules/svm/test/svm.sql_in  | 104 
 .../modules/utilities/validate_args.py_in   |  19 +--
 13 files changed, 774 insertions(+), 106 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/madlib/blob/657cf4aa/.gitignore
--
diff --git a/.gitignore b/.gitignore
index 00dc016..a073fbd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ auto
 *.swp
 *.fdb_latexmk
 *.swo  # vim swap file
+\#*\#  # emacs backup file
 
 # Biblatex temporary files
 *-blx.bib

http://git-wip-us.apache.org/repos/asf/madlib/blob/657cf4aa/src/dbal/EigenIntegration/HandleMap_proto.hpp
--
diff --git a/src/dbal/EigenIntegration/HandleMap_proto.hpp 
b/src/dbal/EigenIntegration/HandleMap_proto.hpp
index 4bfe7c5..c7b6cb2 100644
--- a/src/dbal/EigenIntegration/HandleMap_proto.hpp
+++ b/src/dbal/EigenIntegration/HandleMap_proto.hpp
@@ -19,6 +19,7 @@ namespace eigen_integration {
 template 
 class HandleMap : public Eigen::Map {
 public:
+typedef EigenType PlainEigenType;
 typedef Eigen::Map Base;
 typedef typename Base::Scalar Scalar;
 typedef typename Base::Index Index;
@@ -57,7 +58,7 @@ public:
  *
  * For example, this allows construction of MappedColumnVector from
  * MappedMatrix::col(int) or NativeColumnVector, etc.
- */ 
+ */
 template 
 HandleMap(const Eigen::MapBase& inMappedData,
 typename boost::enable_if_c::type* = 0)

http://git-wip-us.apache.org/repos/asf/madlib/blob/657cf4aa/src/modules/convex/algo/igd.hpp
--
diff --git a/src/modules/convex/algo/igd.hpp b/src/modules/convex/algo/igd.hpp
index cd17e64..3ae4c13 100644
--- a/src/modules/convex/algo/igd.hpp
+++ b/src/modules/convex/algo/igd.hpp
@@ -34,7 +34,9 @@ public:
 typedef typename Task::model_type model_type;
 
 static void transition(state_type , const tuple_type );
+static void transitionInMiniBatch(state_type , const tuple_type 
);
 static void merge(state_type , const_state_type );
+static void mergeInPlace(state_type , const_state_type );
 static void final(state_type );
 };
 
@@ -56,6 +58,62 @@ IGD::transition(state_type ,
 state.task.stepsize * tuple.weight);
 }
 
+/**
+  * @brief Update the transition state in mini-batches
+  *
+  * Note: We assume that
+  * 1. Task defines a model_eigen_type
+  * 2. A batch of tuple.indVar is a Matrix
+  * 3. A batch of tuple.depVar is a ColumnVector
+  * 4. Task defines a getLossAndUpdateModel method
+  *
+ */
+ template 
+ void
+ IGD::transitionInMiniBatch(
+state_type ,
+const tuple_type ) {
+
+madlib_assert(tuple.indVar.rows() == tuple.depVar.rows(),
+  std::runtime_error("Invalid data. Independent and dependent "
+ "batches don't have same number of 
rows."));
+
+int batch_size = state.algo.batchSize;
+int n_epochs =