Github user kaknikhil commented on a diff in the pull request:
https://github.com/apache/madlib/pull/243#discussion_r175877947
--- Diff: src/modules/convex/task/mlp.hpp ---
@@ -111,6 +117,57 @@ class MLP {
template <class Model, class Tuple>
double MLP<Model, Tuple>::lambda = 0;
+template <class Model, class Tuple>
+double
+MLP<Model, Tuple>::getLossAndUpdateModel(
+ model_type &model,
+ const Matrix &x_batch,
+ const Matrix &y_true_batch,
+ const double &stepsize) {
+
+ uint16_t N = model.u.size(); // assuming nu. of layers >= 1
+ size_t n = x_batch.rows();
+ size_t i, k;
+ double total_loss = 0.;
+
+ // gradient added over the batch
+ std::vector<Matrix> total_gradient_per_layer(N);
+ for (k=0; k < N; ++k)
+ total_gradient_per_layer[k] = Matrix::Zero(model.u[k].rows(),
+ model.u[k].cols());
+
+ for (i=0; i < n; i++){
+ ColumnVector x = x_batch.row(i);
+ ColumnVector y_true = y_true_batch.row(i);
+
+ std::vector<ColumnVector> net, o, delta;
+ feedForward(model, x, net, o);
+ backPropogate(y_true, o.back(), net, model, delta);
+
+ for (k=0; k < N; k++){
+ total_gradient_per_layer[k] += o[k] * delta[k].transpose();
+ }
+
+ // loss computation
+ ColumnVector y_estimated = o.back();
+ if(model.is_classification){
+ double clip = 1.e-10;
+ y_estimated = y_estimated.cwiseMax(clip).cwiseMin(1.-clip);
--- End diff --
Just curious, why do we need to do re calculate `y_estimated` ?
---