theSparta commented on issue #8126: Not able to train a neural network using 
MXNET with C++ API
URL: 
https://github.com/apache/incubator-mxnet/issues/8126#issuecomment-333847990
 
 
   I am adding a very simple example here in which I tried to fit a neural 
network on the XOR function but unable to do so as well.
   ```C++
   #include <iostream>
   #include <map>
   #include <string>
   #include "mxnet-cpp/MxNetCpp.h"
   // Allow IDE to parse the types
   #include "../include/mxnet-cpp/op.h"
   
   using namespace std;
   using namespace mxnet::cpp;
   
   
   Symbol mlp(const vector<int> &layers, const vector<Symbol> & weights,
               const std::vector<Symbol> & biases, const string & inp_name )
   {
     auto x = Symbol::Variable(inp_name);
   
     vector<Symbol> outputs(layers.size());
   
     for (size_t i = 0; i < layers.size(); ++i)
     {
       string istr = to_string(i);
       Symbol fc = FullyConnected(
         i == 0? x : outputs[i-1],  // data
         weights[i],
         biases[i],
         layers[i]);
       outputs[i] = i == layers.size()-1 ? fc :  Activation(string("act") + 
istr, fc, 
       ActivationActType::kTanh);
     }
   
     return outputs.back();
   }
   
   int main(int argc, char** argv)
   {
       const int feature_size = 2;
       const vector<int> layers{8, 4, 1};
       const int batch_size = 4;
       const int max_epoch = 100000;
       const float learning_rate = 0.001;
       const float weight_decay = 1e-2;
   
       auto ctx = Context::cpu(); // Use GPU for training
       auto ctx_cpu = Context::cpu();
   
       vector<Symbol> weights(layers.size());
       vector<Symbol> biases(layers.size());
   
       for (size_t i = 0; i < layers.size(); ++i)
       {
           string istr = to_string(i);
           weights[i] = Symbol::Variable("w" + istr);
           biases[i] = Symbol::Variable("b" + istr);
       }
   
       auto Net = mlp(layers, weights, biases, "X");
       auto sym_label = Symbol::Variable("label");
       auto output = LogisticRegressionOutput(string("sigmoid"), Net, 
sym_label);
   
       map<string, NDArray> args_map;
       args_map["X"] = NDArray(Shape(batch_size, feature_size) , ctx);
       args_map["label"] = NDArray(Shape(batch_size, 1), ctx);
   
       auto *exec = output.SimpleBind(ctx, args_map);
       output.InferArgsMap(ctx, &args_map, args_map);
       auto arg_names = output.ListArguments();
   
       Xavier xavier = Xavier(Xavier::gaussian, Xavier::avg);
       for (auto &arg : args_map)
       {
           xavier(arg.first, &arg.second);
       }
   
       Optimizer* opt = OptimizerRegistry::Find("adam");
       opt->SetParam("rescale_grad", 1.0 / batch_size)
           ->SetParam("lr", learning_rate)
           ->SetParam("wd", weight_decay);
   
       // XOR Function
       mx_float* aptr_x = new mx_float[batch_size * feature_size];
       mx_float* aptr_y = new mx_float[batch_size];
   
       aptr_x[0] = 0.; aptr_x[1] = 0.; aptr_y[0] = 0;
       aptr_x[2] = 0; aptr_x[3] = 1.; aptr_y[1] = 1;
       aptr_x[4] = 1.; aptr_x[5] = 0.; aptr_y[2] = 1;
       aptr_x[6] = 1.; aptr_x[7] = 1.; aptr_y[3] = 0;
   
       NDArray train_data = NDArray(Shape(batch_size, 2), ctx_cpu, false);
       NDArray train_label = NDArray(Shape(batch_size), ctx_cpu, false);
       train_data.SyncCopyFromCPU(aptr_x, batch_size * 2);
       train_label.SyncCopyFromCPU(aptr_y, batch_size);
       train_data.WaitToRead();
       train_label.WaitToRead();
   
       Accuracy acu_train;
       for (int ITER = 0; ITER < max_epoch ; ++ITER)
       {
           acu_train.Reset();
           args_map["X"] = train_data.Copy(ctx);
           args_map["label"] = train_label.Copy(ctx);
           NDArray::WaitAll();
   
           exec->Forward(true);
           acu_train.Update(args_map["label"], exec->outputs[0]);
   
           if(ITER % 5000 == 0){
               auto out = (exec->outputs[0]).Copy(ctx_cpu);
               auto labels = args_map["label"].Copy(ctx_cpu);
               NDArray::WaitAll();
               const mx_float * outs = out.GetData();
               auto lbs = labels.GetData();
               for (int i = 0 ; i < batch_size ; i++)
                   cout << lbs[i] << ":" << outs[i] << " ";
               cout << endl;
               LG << "ITER: " << ITER << " Train Accuracy: " << acu_train.Get();
           }
           exec->Backward();
           // Update parameters
           for (size_t i = 0; i < arg_names.size(); ++i)
           {
               if (arg_names[i] == "X" || arg_names[i] == "label") continue;
               opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
           }
       }
   
       delete exec;
       delete [] aptr_x;
       delete [] aptr_y;
       MXNotifyShutdown();
       return 0;
    }
   ```
   The output is (True_label : Predicted_label):
   ```
   0:0.95178 1:0.880215 1:0.944654 0:0.86154 
   [19:14:56] xor.cpp:114: ITER: 0 Train Accuracy: 0.5
   0:0.786497 1:1 1:0.799124 0:3.35246e-13 
   [19:14:57] xor.cpp:114: ITER: 5000 Train Accuracy: 0.5
   0:0.786137 1:1 1:0.800972 0:1.01632e-21 
   [19:14:58] xor.cpp:114: ITER: 10000 Train Accuracy: 0.5
   0:0.783514 1:1 1:0.802832 0:3.29902e-30 
   [19:14:58] xor.cpp:114: ITER: 15000 Train Accuracy: 0.5
   0:0.785589 1:1 1:0.805458 0:1.13999e-38 
   [19:14:59] xor.cpp:114: ITER: 20000 Train Accuracy: 0.5
   0:0.785148 1:1 1:0.808582 0:0 
   [19:15:00] xor.cpp:114: ITER: 25000 Train Accuracy: 0.5
   0:0.786545 1:1 1:0.812904 0:0 
   [19:15:00] xor.cpp:114: ITER: 30000 Train Accuracy: 0.5
   0:0.784969 1:1 1:0.8189 0:0 
   [19:15:01] xor.cpp:114: ITER: 35000 Train Accuracy: 0.5
   0:0.784798 1:1 1:0.82841 0:0 
   [19:15:02] xor.cpp:114: ITER: 40000 Train Accuracy: 0.5
   0:0.787042 1:1 1:0.845301 0:0 
   [19:15:02] xor.cpp:114: ITER: 45000 Train Accuracy: 0.5
   0:0.784718 1:1 1:0.879533 0:0 
   [19:15:03] xor.cpp:114: ITER: 50000 Train Accuracy: 0.5
   0:0.783628 1:1 1:0.934087 0:0 
   [19:15:04] xor.cpp:114: ITER: 55000 Train Accuracy: 0.5
   0:0.786908 1:1 1:0.948499 0:0 
   [19:15:04] xor.cpp:114: ITER: 60000 Train Accuracy: 0.5
   0:0.784415 1:1 1:0.948458 0:0 
   [19:15:05] xor.cpp:114: ITER: 65000 Train Accuracy: 0.5
   0:0.784358 1:1 1:0.948456 0:0 
   [19:15:06] xor.cpp:114: ITER: 70000 Train Accuracy: 0.5
   0:0.784338 1:1 1:0.948455 0:0 
   [19:15:07] xor.cpp:114: ITER: 75000 Train Accuracy: 0.5
   0:0.797159 1:1 1:0.948738 0:0 
   [19:15:07] xor.cpp:114: ITER: 80000 Train Accuracy: 0.5
   0:0.784321 1:1 1:0.948455 0:0 
   [19:15:08] xor.cpp:114: ITER: 85000 Train Accuracy: 0.5
   0:0.7837 1:1 1:0.948445 0:0 
   [19:15:09] xor.cpp:114: ITER: 90000 Train Accuracy: 0.5
   0:0.78427 1:1 1:0.948449 0:0 
   [19:15:09] xor.cpp:114: ITER: 95000 Train Accuracy: 0.5
   ```
   Even after 10000 iterations, the neural net is not able to predict the XOR 
function correctly. I have tried different layer sizes (both 
increasing/decreasing the number of neurons) but it didn't help at all. 
   **Is this a bug in the C++ API or is there any problem with my code.** 
@piiswrong 
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to