Github user njayaram2 commented on a diff in the pull request:
https://github.com/apache/madlib/pull/243#discussion_r175947857
--- Diff: src/modules/convex/algo/igd.hpp ---
@@ -90,20 +90,27 @@ IGD<State, ConstState, Task>::transition(state_type
&state,
for (int curr_epoch=0; curr_epoch < n_epochs; curr_epoch++) {
double loss = 0.0;
- for (int curr_batch=0, curr_batch_row_index=0; curr_batch <
n_batches;
- curr_batch++, curr_batch_row_index += batch_size) {
- Matrix X_batch;
- ColumnVector y_batch;
- if (curr_batch == n_batches-1) {
- // last batch
- X_batch =
tuple.indVar.bottomRows(n_rows-curr_batch_row_index);
- y_batch = tuple.depVar.tail(n_rows-curr_batch_row_index);
- } else {
- X_batch = tuple.indVar.block(curr_batch_row_index, 0,
batch_size, n_ind_cols);
- y_batch = tuple.depVar.segment(curr_batch_row_index,
batch_size);
- }
- loss += Task::getLossAndUpdateModel(
- state.task.model, X_batch, y_batch, state.task.stepsize);
+ int random_curr_batch[n_batches];
+ for(int i=0; i<n_batches; i++) {
+ random_curr_batch[i] = i;
+ }
+ int curr_batch_row_index = 0;
+ std::random_shuffle(&random_curr_batch[0],
&random_curr_batch[n_batches]);
--- End diff --
I don't think we should set a seed. Setting a seed will ensure we will
always get the same set of random numbers.
---