Repository: madlib
Updated Branches:
  refs/heads/master 0e6469255 -> bb4b54205


Build: Add support for GCC 5.0+

JIRA: MADLIB-1025

GCC 5.1 release libstdc++ introduced a new library ABI that includes new
implementations of std::string and std::list. We disable this ABI to
ensure MADlib compiles with non-gcc compilers as well.

Further, using -O3 optimization (default in CMAKE_BUILD_TYPE=Release)
led to runtime errors. We revert to O2 optimization level (default in
CMAKE_BUILD_TYPE=RelWithDebInfo) to avoid this issue.
The RCA for this problem is yet unknown.

Co-authored-by: Nandish Jayaram <njaya...@apache.org>
Co-authored-by: Nikhil Kak <n...@pivotal.io>
Co-authored-by: Orhan Kislal <okis...@pivotal.io>


Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/bb4b5420
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/bb4b5420
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/bb4b5420

Branch: refs/heads/master
Commit: bb4b54205930cdf6d39ec4ea4ebe99b2522a1789
Parents: 0e64692
Author: Rahul Iyer <ri...@apache.org>
Authored: Thu May 17 15:38:50 2018 -0700
Committer: Rahul Iyer <ri...@apache.org>
Committed: Thu May 17 15:46:25 2018 -0700

----------------------------------------------------------------------
 CMakeLists.txt                                  | 14 ++++++-
 src/modules/convex/algo/igd.hpp                 | 41 ++++++++++----------
 src/modules/convex/mlp_igd.cpp                  | 20 +++++-----
 src/modules/convex/task/mlp.hpp                 | 38 ++++++++----------
 src/modules/convex/type/model.hpp               | 15 ++++---
 src/modules/convex/type/state.hpp               |  7 +++-
 src/modules/recursive_partitioning/DT_impl.hpp  |  5 +--
 src/modules/recursive_partitioning/DT_proto.hpp |  1 -
 .../recursive_partitioning/decision_tree.cpp    | 15 ++++---
 9 files changed, 82 insertions(+), 74 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/madlib/blob/bb4b5420/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cc6bf72..9193ac7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -69,9 +69,14 @@ if(CMAKE_COMPILER_IS_GNUCC)
     # 
http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_02_01_01
     # We specify that we are POSIX.1-2001 compliant and XSI-conforming. We only
     # need to specify _XOPEN_SOURCE as _POSIX_C_SOURCE will be set implicitly.
-    set(CMAKE_C_FLAGS "-std=c99 -pedantic -Wall -Wextra -Wno-clobbered 
-D_XOPEN_SOURCE=600"
+    set(CMAKE_C_FLAGS "-std=c99 -D_GLIBCXX_USE_CXX11_ABI=0 -pedantic -Wall 
-Wextra -Wno-clobbered -D_XOPEN_SOURCE=600"
         CACHE STRING
         "Flags used by the compiler during all build types." FORCE)
+    if((CMAKE_C_COMPILER_VERSION VERSION_EQUAL 5.0) OR 
(CMAKE_C_COMPILER_VERSION VERSION_GREATER 5.0))
+        # Versions 5+ fail with the "Release" build type i.e. when optimization
+        # level is -O3 or greater.
+        add_compile_options("-O2")
+    endif()
 
     # See comments below for C++:
     # Weird enough, the following property is set only for C++ but not for C
@@ -93,10 +98,15 @@ if(CMAKE_COMPILER_IS_GNUCXX)
     # - long long is not part of the C++ 1998/2003 standard, but it is such a
     # common (and useful) extension that we do not want to hear warnings about
     # it.
-    set(CMAKE_CXX_FLAGS "-std=gnu++98 -fdiagnostics-show-option -Wall -Wextra 
-pedantic -Wconversion -Wno-long-long -Wno-clobbered -mno-sse2 
-fstrict-aliasing"
+    set(CMAKE_CXX_FLAGS "-std=gnu++98 -D_GLIBCXX_USE_CXX11_ABI=0 
-fdiagnostics-show-option -Wall -Wextra -pedantic -Wconversion -Wno-long-long 
-Wno-clobbered -mno-sse2 -fstrict-aliasing"
         CACHE STRING
         "Flags used by the compiler during all build types." FORCE)
 
+    if((CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 5.0) OR 
(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0))
+        # Versions 5+ fail with the "Release" build type i.e. when optimization
+        # level is -O3 or greater.
+        add_compile_options("-O2")
+    endif()
     # We want to include some header files as system header files in order to
     # disable warnings. However, on Mac OS X, a CMake variable is not set
     # correctly on Mac OS X. http://www.cmake.org/Bug/view.php?id=10837

http://git-wip-us.apache.org/repos/asf/madlib/blob/bb4b5420/src/modules/convex/algo/igd.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/algo/igd.hpp b/src/modules/convex/algo/igd.hpp
index f5ec471..738553f 100644
--- a/src/modules/convex/algo/igd.hpp
+++ b/src/modules/convex/algo/igd.hpp
@@ -107,16 +107,15 @@ IGD<State, ConstState, Task>::merge(state_type &state,
                   std::runtime_error("Invalid data. Independent and dependent "
                                      "batches don't have same number of 
rows."));
 
-    int batch_size = state.batchSize;
-    int n_epochs = state.nEpochs;
+    uint16_t batch_size = state.batchSize;
+    uint16_t n_epochs = state.nEpochs;
 
     // n_rows/n_ind_cols are the rows/cols in a transition tuple.
-    int n_rows = tuple.indVar.rows();
-    int n_ind_cols = tuple.indVar.cols();
-    int n_batches = n_rows < batch_size ? 1 :
-                    n_rows / batch_size +
-                    int(n_rows%batch_size > 0);
+    Index n_rows = tuple.indVar.rows();
+    size_t n_batches = n_rows < batch_size ? 1 :
+                        size_t(n_rows / batch_size) + size_t(n_rows % 
batch_size > 0);
 
+    double max_loss = 0.0;
     for (int curr_epoch=0; curr_epoch < n_epochs; curr_epoch++) {
         double loss = 0.0;
         /*
@@ -126,33 +125,35 @@ IGD<State, ConstState, Task>::merge(state_type &state,
             a buffer. Note that this still does not randomize rows within
             a batch.
         */
-        int random_curr_batch[n_batches];
-        for(int i=0; i<n_batches; i++) {
+        std::vector<size_t> random_curr_batch(n_batches, 0);
+        for(size_t i=0; i < n_batches; i++) {
             random_curr_batch[i] = i;
         }
-        int curr_batch_row_index = 0;
         std::random_shuffle(&random_curr_batch[0], 
&random_curr_batch[n_batches]);
-        for (int i=0; i < n_batches; i++) {
-            int curr_batch = random_curr_batch[i];
-            int curr_batch_row_index = curr_batch * batch_size;
+
+        for (size_t i = 0; i < n_batches; i++) {
+            size_t curr_batch = random_curr_batch[i];
+            Index curr_batch_row_index = static_cast<Index>(curr_batch * 
batch_size);
             Matrix X_batch;
             Matrix Y_batch;
             if (curr_batch == n_batches-1) {
                // last batch
-               X_batch = tuple.indVar.bottomRows(n_rows-curr_batch_row_index);
-               Y_batch = tuple.depVar.bottomRows(n_rows-curr_batch_row_index);
+               X_batch = tuple.indVar.bottomRows(n_rows - 
curr_batch_row_index);
+               Y_batch = tuple.depVar.bottomRows(n_rows - 
curr_batch_row_index);
             } else {
-                X_batch = tuple.indVar.block(curr_batch_row_index, 0, 
batch_size, n_ind_cols);
-                Y_batch = tuple.depVar.block(curr_batch_row_index, 0, 
batch_size, tuple.depVar.cols());
+                X_batch = tuple.indVar.block(curr_batch_row_index, 0,
+                                             batch_size, tuple.indVar.cols());
+                Y_batch = tuple.depVar.block(curr_batch_row_index, 0,
+                                             batch_size, tuple.depVar.cols());
             }
             loss += Task::getLossAndUpdateModel(
                 state.model, X_batch, Y_batch, state.stepsize);
         }
 
-        // The first epoch will most likely have the highest loss.
-        // Being pessimistic, use the total loss only from the first epoch.
-        if (curr_epoch==0) state.loss += loss;
+        if (max_loss < loss) max_loss = loss;
     }
+    // Be pessimistic and report the maximum loss
+    state.loss += max_loss;
     return;
  }
 

http://git-wip-us.apache.org/repos/asf/madlib/blob/bb4b5420/src/modules/convex/mlp_igd.cpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/mlp_igd.cpp b/src/modules/convex/mlp_igd.cpp
index 8cd0baa..63f278f 100644
--- a/src/modules/convex/mlp_igd.cpp
+++ b/src/modules/convex/mlp_igd.cpp
@@ -81,7 +81,7 @@ mlp_igd_transition::run(AnyType &args) {
             // configuration parameters and initialization
             // this is run only once (first iteration, first tuple)
             ArrayHandle<double> numbersOfUnits = 
args[4].getAs<ArrayHandle<double> >();
-            int numberOfStages = numbersOfUnits.size() - 1;
+            uint16_t numberOfStages = 
static_cast<uint16_t>(numbersOfUnits.size() - 1);
 
             state.allocate(*this, numberOfStages,
                            reinterpret_cast<const double 
*>(numbersOfUnits.ptr()));
@@ -100,8 +100,8 @@ mlp_igd_transition::run(AnyType &args) {
                 // state.reset() ensures algo.incrModel is copied from 
task.model
                 Index layer_start = 0;
                 for (size_t k = 0; k < numberOfStages; ++k){
-                    for (size_t j=0; j < state.task.model.u[k].cols(); ++j){
-                        for (size_t i=0; i < state.task.model.u[k].rows(); 
++i){
+                    for (Index j=0; j < state.task.model.u[k].cols(); ++j){
+                        for (Index i=0; i < state.task.model.u[k].rows(); ++i){
                             state.task.model.u[k](i, j) = warm_start_coeff(
                                 layer_start + j * state.task.model.u[k].rows() 
+ i);
                         }
@@ -204,9 +204,7 @@ mlp_minibatch_transition::run(AnyType &args) {
         } else {
             // configuration parameters
             ArrayHandle<double> numbersOfUnits = 
args[4].getAs<ArrayHandle<double> >();
-            int numberOfStages = numbersOfUnits.size() - 1;
-
-
+            uint16_t numberOfStages = 
static_cast<uint16_t>(numbersOfUnits.size() - 1);
             state.allocate(*this, numberOfStages,
                            reinterpret_cast<const double 
*>(numbersOfUnits.ptr()));
             state.stepsize = args[5].getAs<double>();
@@ -218,8 +216,8 @@ mlp_minibatch_transition::run(AnyType &args) {
                 MappedColumnVector warm_start_coeff = 
args[9].getAs<MappedColumnVector>();
                 Index layer_start = 0;
                 for (size_t k = 0; k < numberOfStages; ++k){
-                    for (size_t j=0; j < state.model.u[k].cols(); ++j){
-                        for (size_t i=0; i < state.model.u[k].rows(); ++i){
+                    for (Index j=0; j < state.model.u[k].cols(); ++j){
+                        for (Index i=0; i < state.model.u[k].rows(); ++i){
                             state.model.u[k](i, j) = warm_start_coeff(
                                 layer_start + j * state.model.u[k].rows() + i);
                         }
@@ -235,8 +233,8 @@ mlp_minibatch_transition::run(AnyType &args) {
 
             state.lambda = args[10].getAs<double>();
             MLPTask::lambda = state.lambda;
-            state.batchSize = args[11].getAs<int>();
-            state.nEpochs = args[12].getAs<int>();
+            state.batchSize = static_cast<uint16_t>(args[11].getAs<int>());
+            state.nEpochs = static_cast<uint16_t>(args[12].getAs<int>());
         }
         // resetting in either case
         state.reset();
@@ -375,7 +373,7 @@ internal_predict_mlp::run(AnyType &args) {
     MappedColumnVector coeff = args[0].getAs<MappedColumnVector>();
     MappedColumnVector layerSizes = args[4].getAs<MappedColumnVector>();
     // Input layer doesn't count
-    size_t numberOfStages = layerSizes.size()-1;
+    uint16_t numberOfStages = static_cast<uint16_t>(layerSizes.size() - 1);
     double is_classification = args[2].getAs<double>();
     double activation = args[3].getAs<double>();
     int is_dep_var_array_for_classification = args[8].getAs<int>();

http://git-wip-us.apache.org/repos/asf/madlib/blob/bb4b5420/src/modules/convex/task/mlp.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/task/mlp.hpp b/src/modules/convex/task/mlp.hpp
index adf87a7..a13aa84 100644
--- a/src/modules/convex/task/mlp.hpp
+++ b/src/modules/convex/task/mlp.hpp
@@ -126,18 +126,17 @@ MLP<Model, Tuple>::getLossAndUpdateModel(
         const Matrix         &y_true_batch,
         const double         &stepsize) {
 
-    uint16_t num_layers = model.u.size(); // assuming nu. of layers >= 1
-    size_t num_rows_in_batch = x_batch.rows();
-    size_t i, k;
+    uint16_t num_layers = static_cast<uint16_t>(model.u.size()); // assuming 
nu. of layers >= 1
+    Index num_rows_in_batch = x_batch.rows();
     double total_loss = 0.;
 
     // gradient added over the batch
     std::vector<Matrix> total_gradient_per_layer(num_layers);
-    for (k=0; k < num_layers; ++k)
+    for (Index k=0; k < num_layers; ++k)
         total_gradient_per_layer[k] = Matrix::Zero(model.u[k].rows(),
                                                    model.u[k].cols());
 
-    for (i=0; i < num_rows_in_batch; i++){
+    for (Index i=0; i < num_rows_in_batch; i++){
         ColumnVector x = x_batch.row(i);
         ColumnVector y_true = y_true_batch.row(i);
 
@@ -145,7 +144,7 @@ MLP<Model, Tuple>::getLossAndUpdateModel(
         feedForward(model, x, net, o);
         backPropogate(y_true, o.back(), net, model, delta);
 
-        for (k=0; k < num_layers; k++){
+        for (Index k=0; k < num_layers; k++){
                 total_gradient_per_layer[k] += o[k] * delta[k].transpose();
         }
 
@@ -161,12 +160,13 @@ MLP<Model, Tuple>::getLossAndUpdateModel(
             total_loss += 0.5 * (y_estimated - y_true).squaredNorm();
         }
     }
-    for (k=0; k < num_layers; k++){
+    for (Index k=0; k < num_layers; k++){
         Matrix regularization = MLP<Model, Tuple>::lambda * model.u[k];
         regularization.row(0).setZero(); // Do not update bias
-        model.u[k] -= stepsize * (total_gradient_per_layer[k] / \
-                                  num_rows_in_batch + \
-                                  regularization);
+        model.u[k] -=
+            stepsize *
+            (total_gradient_per_layer[k] / 
static_cast<double>(num_rows_in_batch) +
+             regularization);
     }
     return total_loss;
 }
@@ -178,14 +178,13 @@ MLP<Model, Tuple>::gradientInPlace(
         const independent_variables_type    &x,
         const dependent_variable_type       &y_true,
         const double                        &stepsize) {
-    uint16_t N = model.u.size(); // assuming nu. of layers >= 1
-    uint16_t k;
+    size_t N = model.u.size(); // assuming nu. of layers >= 1
     std::vector<ColumnVector> net, o, delta;
 
     feedForward(model, x, net, o);
     backPropogate(y_true, o.back(), net, model, delta);
 
-    for (k=0; k < N; k++){
+    for (size_t k=0; k < N; k++){
         Matrix regularization = MLP<Model, Tuple>::lambda*model.u[k];
         regularization.row(0).setZero(); // Do not update bias
         model.u[k] -= stepsize * (o[k] * delta[k].transpose() + 
regularization);
@@ -252,13 +251,11 @@ MLP<Model, Tuple>::feedForward(
         const independent_variables_type    &x,
         std::vector<ColumnVector>           &net,
         std::vector<ColumnVector>           &o){
-    uint16_t k, N;
-    /*
-        The network starts with the 0th layer (input), followed by n_layers
+    /* The network starts with the 0th layer (input), followed by n_layers
         number of hidden layers, and then an output layer.
     */
     // Total number of coefficients in the model
-    N = model.u.size(); // assuming >= 1
+    size_t N = model.u.size(); // assuming >= 1
     net.resize(N + 1);
     // o[k] is a vector of the output of the kth layer
     o.resize(N + 1);
@@ -274,7 +271,7 @@ MLP<Model, Tuple>::feedForward(
     o[0].resize(x.size() + 1);
     o[0] << 1.,x;
 
-    for (k = 1; k < N; k ++) {
+    for (size_t k = 1; k < N; k ++) {
         // o_k = activation(sum(o_{k-1} * u_{k-1}))
         // net_k just does the inner sum: input to the activation function
         net[k] = model.u[k-1].transpose() * o[k-1];
@@ -300,8 +297,7 @@ MLP<Model, Tuple>::backPropogate(
         const std::vector<ColumnVector>     &net,
         const model_type                    &model,
         std::vector<ColumnVector>           &delta) {
-    uint16_t k, N;
-    N = model.u.size(); // assuming >= 1
+    size_t N = model.u.size(); // assuming >= 1
     delta.resize(N);
 
     double (*activationDerivative)(const double&);
@@ -313,7 +309,7 @@ MLP<Model, Tuple>::backPropogate(
         activationDerivative = &tanhDerivative;
 
     delta.back() = y_estimated - y_true;
-    for (k = N - 1; k >= 1; k --) {
+    for (size_t k = N - 1; k >= 1; k --) {
         // Do not include the bias terms
         delta[k-1] = model.u[k].bottomRows(model.u[k].rows()-1) * delta[k];
         delta[k-1] = delta[k-1].array() * 
net[k].unaryExpr(activationDerivative).array();

http://git-wip-us.apache.org/repos/asf/madlib/blob/bb4b5420/src/modules/convex/type/model.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/type/model.hpp 
b/src/modules/convex/type/model.hpp
index d77e562..11013e5 100644
--- a/src/modules/convex/type/model.hpp
+++ b/src/modules/convex/type/model.hpp
@@ -124,12 +124,12 @@ struct MLPModel {
         const double *n = inNumbersOfUnits;
         size_t k;
         for (k = 0; k < N; k ++) {
-            size += (n[k] + 1) * (n[k+1]);
+            size += static_cast<uint32_t>((n[k] + 1) * (n[k+1]));
         }
         return size;     // weights (u)
     }
 
-    uint32_t rebind(const double *is_classification_in,
+    size_t rebind(const double *is_classification_in,
                     const double *activation_in,
                     const double *data,
                     const uint16_t &inNumberOfStages,
@@ -141,15 +141,14 @@ struct MLPModel {
         is_classification.rebind(is_classification_in);
         activation.rebind(activation_in);
 
-        uint32_t sizeOfU = 0;
+        size_t sizeOfU = 0;
         u.clear();
         for (k = 0; k < N; k ++) {
-            // u.push_back(Eigen::Map<Matrix >(
-            //     const_cast<double*>(data + sizeOfU),
-            //     n[k] + 1, n[k+1]));
             u.push_back(MutableMappedMatrix());
-            u[k].rebind(const_cast<double *>(data + sizeOfU), n[k] + 1, 
n[k+1]);
-            sizeOfU += (n[k] + 1) * (n[k+1]);
+            u[k].rebind(const_cast<double *>(data + sizeOfU),
+                        static_cast<Index>(n[k] + 1),
+                        static_cast<Index>(n[k+1]));
+            sizeOfU += static_cast<size_t>((n[k] + 1) * (n[k+1]));
         }
 
         return sizeOfU;

http://git-wip-us.apache.org/repos/asf/madlib/blob/bb4b5420/src/modules/convex/type/state.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/type/state.hpp 
b/src/modules/convex/type/state.hpp
index aaf0015..6bd5854 100644
--- a/src/modules/convex/type/state.hpp
+++ b/src/modules/convex/type/state.hpp
@@ -669,8 +669,11 @@ private:
             reinterpret_cast<dimension_pointer_type>(&mStorage[1]);
         task.stepsize.rebind(&mStorage[N + 2]);
         task.lambda.rebind(&mStorage[N + 3]);
-        uint32_t sizeOfModel = task.model.rebind(&mStorage[N + 4],&mStorage[N 
+ 5],&mStorage[N + 6],
-                task.numberOfStages, task.numbersOfUnits);
+        size_t sizeOfModel = task.model.rebind(&mStorage[N + 4],
+                                                 &mStorage[N + 5],
+                                                 &mStorage[N + 6],
+                                                 task.numberOfStages,
+                                                 task.numbersOfUnits);
 
         algo.incrModel.rebind(&mStorage[N + 4],&mStorage[N + 5],&mStorage[N + 
6 + sizeOfModel],
                 task.numberOfStages, task.numbersOfUnits);

http://git-wip-us.apache.org/repos/asf/madlib/blob/bb4b5420/src/modules/recursive_partitioning/DT_impl.hpp
----------------------------------------------------------------------
diff --git a/src/modules/recursive_partitioning/DT_impl.hpp 
b/src/modules/recursive_partitioning/DT_impl.hpp
index 4327ade..69d0817 100644
--- a/src/modules/recursive_partitioning/DT_impl.hpp
+++ b/src/modules/recursive_partitioning/DT_impl.hpp
@@ -552,7 +552,7 @@ DecisionTree<Container>::expand(const Accumulator &state,
             uint64_t total_count = statCount(predictions.row(current));
             if (max_impurity_gain > 0 &&
                     shouldSplit(total_count, true_count, false_count,
-                                min_split, min_bucket, sps, max_depth)) {
+                                min_split, min_bucket, max_depth)) {
 
                 double max_threshold;
                 if (max_is_cat)
@@ -864,7 +864,7 @@ DecisionTree<Container>::expand_by_sampling(const 
Accumulator &state,
 
             if (max_impurity_gain > 0 &&
                     shouldSplit(total_count, true_count, false_count,
-                                min_split, min_bucket, sps, max_depth)) {
+                                min_split, min_bucket, max_depth)) {
 
                 double max_threshold;
                 if (max_is_cat)
@@ -1062,7 +1062,6 @@ DecisionTree<Container>::shouldSplit(const uint64_t 
&total_count,
                                      const uint64_t &false_count,
                                      const uint16_t &min_split,
                                      const uint16_t &min_bucket,
-                                     const uint16_t &stats_per_split,
                                      const uint16_t &max_depth) const {
     // total_count != true_count + false_count if there are rows with NULL 
values
 

http://git-wip-us.apache.org/repos/asf/madlib/blob/bb4b5420/src/modules/recursive_partitioning/DT_proto.hpp
----------------------------------------------------------------------
diff --git a/src/modules/recursive_partitioning/DT_proto.hpp 
b/src/modules/recursive_partitioning/DT_proto.hpp
index 33446a1..1852f69 100644
--- a/src/modules/recursive_partitioning/DT_proto.hpp
+++ b/src/modules/recursive_partitioning/DT_proto.hpp
@@ -101,7 +101,6 @@ public:
                      const uint64_t &false_count,
                      const uint16_t &min_split,
                      const uint16_t &min_bucket,
-                     const uint16_t &stats_per_split,
                      const uint16_t &max_depth) const;
 
     template <class Accumulator>

http://git-wip-us.apache.org/repos/asf/madlib/blob/bb4b5420/src/modules/recursive_partitioning/decision_tree.cpp
----------------------------------------------------------------------
diff --git a/src/modules/recursive_partitioning/decision_tree.cpp 
b/src/modules/recursive_partitioning/decision_tree.cpp
index b85923a..5579cb5 100644
--- a/src/modules/recursive_partitioning/decision_tree.cpp
+++ b/src/modules/recursive_partitioning/decision_tree.cpp
@@ -160,12 +160,14 @@ compute_leaf_stats_transition::run(AnyType & args){
         // tree to the index in the actual tree.
         ColumnVector leaf_feature_indices =
             dt.feature_indices.tail(dt.feature_indices.size()/2 + 
1).cast<double>();
-        ColumnVector leaf_node_lookup(leaf_feature_indices.size());
-        size_t n_leaves_not_finished = 0;
+        IntegerVector leaf_node_lookup(leaf_feature_indices.size());
+
+        uint32_t n_leaves_not_finished = 0;
         for (Index i=0; i < leaf_feature_indices.size(); i++){
             if ((leaf_feature_indices(i) != dt.NODE_NON_EXISTING) &&
                     (leaf_feature_indices(i) != dt.FINISHED_LEAF)){
-                leaf_node_lookup(i) = n_leaves_not_finished++;  // increment 
after assigning
+                leaf_node_lookup(i) = static_cast<int>(n_leaves_not_finished);
+                n_leaves_not_finished++;
             }
             else{
                 leaf_node_lookup(i) = -1;
@@ -321,11 +323,12 @@ compute_surr_stats_transition::run(AnyType & args){
             ColumnVector final_internal_feature_indices =
                 dt.feature_indices.segment(dt.feature_indices.size()/4,
                                            dt.feature_indices.size()/4 + 
1).cast<double>();
-            ColumnVector index_lookup(final_internal_feature_indices.size());
-            Index n_internal_nodes_reachable = 0;
+            IntegerVector index_lookup(final_internal_feature_indices.size());
+            uint32_t n_internal_nodes_reachable = 0;
             for (Index i=0; i < final_internal_feature_indices.size(); i++){
                 if (final_internal_feature_indices(i) >= 0){
-                    index_lookup(i) = n_internal_nodes_reachable++;  // 
increment after assigning
+                    index_lookup(i) = 
static_cast<int>(n_internal_nodes_reachable);
+                    n_internal_nodes_reachable++;
                 }
                 else{
                     index_lookup(i) = -1;

Reply via email to