theSparta commented on issue #8126: Not able to train a neural network using MXNET with C++ API URL: https://github.com/apache/incubator-mxnet/issues/8126#issuecomment-333847990 I am adding a very simple example here in which I tried to fit a neural network on the XOR function but unable to do so as well. ```C++ #include <iostream> #include <map> #include <string> #include "mxnet-cpp/MxNetCpp.h" // Allow IDE to parse the types #include "../include/mxnet-cpp/op.h" using namespace std; using namespace mxnet::cpp; Symbol mlp(const vector<int> &layers, const vector<Symbol> & weights, const std::vector<Symbol> & biases, const string & inp_name ) { auto x = Symbol::Variable(inp_name); vector<Symbol> outputs(layers.size()); for (size_t i = 0; i < layers.size(); ++i) { string istr = to_string(i); Symbol fc = FullyConnected( i == 0? x : outputs[i-1], // data weights[i], biases[i], layers[i]); outputs[i] = i == layers.size()-1 ? fc : Activation(string("act") + istr, fc, ActivationActType::kTanh); } return outputs.back(); } int main(int argc, char** argv) { const int feature_size = 2; const vector<int> layers{8, 4, 1}; const int batch_size = 4; const int max_epoch = 100000; const float learning_rate = 0.001; const float weight_decay = 1e-2; auto ctx = Context::cpu(); // Use GPU for training auto ctx_cpu = Context::cpu(); vector<Symbol> weights(layers.size()); vector<Symbol> biases(layers.size()); for (size_t i = 0; i < layers.size(); ++i) { string istr = to_string(i); weights[i] = Symbol::Variable("w" + istr); biases[i] = Symbol::Variable("b" + istr); } auto Net = mlp(layers, weights, biases, "X"); auto sym_label = Symbol::Variable("label"); auto output = LogisticRegressionOutput(string("sigmoid"), Net, sym_label); map<string, NDArray> args_map; args_map["X"] = NDArray(Shape(batch_size, feature_size) , ctx); args_map["label"] = NDArray(Shape(batch_size, 1), ctx); auto *exec = output.SimpleBind(ctx, args_map); output.InferArgsMap(ctx, &args_map, args_map); auto arg_names = output.ListArguments(); Xavier xavier = Xavier(Xavier::gaussian, Xavier::avg); for (auto &arg : args_map) { xavier(arg.first, &arg.second); } Optimizer* opt = OptimizerRegistry::Find("adam"); opt->SetParam("rescale_grad", 1.0 / batch_size) ->SetParam("lr", learning_rate) ->SetParam("wd", weight_decay); // XOR Function mx_float* aptr_x = new mx_float[batch_size * feature_size]; mx_float* aptr_y = new mx_float[batch_size]; aptr_x[0] = 0.; aptr_x[1] = 0.; aptr_y[0] = 0; aptr_x[2] = 0; aptr_x[3] = 1.; aptr_y[1] = 1; aptr_x[4] = 1.; aptr_x[5] = 0.; aptr_y[2] = 1; aptr_x[6] = 1.; aptr_x[7] = 1.; aptr_y[3] = 0; NDArray train_data = NDArray(Shape(batch_size, 2), ctx_cpu, false); NDArray train_label = NDArray(Shape(batch_size), ctx_cpu, false); train_data.SyncCopyFromCPU(aptr_x, batch_size * 2); train_label.SyncCopyFromCPU(aptr_y, batch_size); train_data.WaitToRead(); train_label.WaitToRead(); Accuracy acu_train; for (int ITER = 0; ITER < max_epoch ; ++ITER) { acu_train.Reset(); args_map["X"] = train_data.Copy(ctx); args_map["label"] = train_label.Copy(ctx); NDArray::WaitAll(); exec->Forward(true); acu_train.Update(args_map["label"], exec->outputs[0]); if(ITER % 5000 == 0){ auto out = (exec->outputs[0]).Copy(ctx_cpu); auto labels = args_map["label"].Copy(ctx_cpu); NDArray::WaitAll(); const mx_float * outs = out.GetData(); auto lbs = labels.GetData(); for (int i = 0 ; i < batch_size ; i++) cout << lbs[i] << ":" << outs[i] << " "; cout << endl; LG << "ITER: " << ITER << " Train Accuracy: " << acu_train.Get(); } exec->Backward(); // Update parameters for (size_t i = 0; i < arg_names.size(); ++i) { if (arg_names[i] == "X" || arg_names[i] == "label") continue; opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); } } delete exec; delete [] aptr_x; delete [] aptr_y; MXNotifyShutdown(); return 0; } ``` The output is (True_label : Predicted_label): ``` 0:0.95178 1:0.880215 1:0.944654 0:0.86154 [19:14:56] xor.cpp:114: ITER: 0 Train Accuracy: 0.5 0:0.786497 1:1 1:0.799124 0:3.35246e-13 [19:14:57] xor.cpp:114: ITER: 5000 Train Accuracy: 0.5 0:0.786137 1:1 1:0.800972 0:1.01632e-21 [19:14:58] xor.cpp:114: ITER: 10000 Train Accuracy: 0.5 0:0.783514 1:1 1:0.802832 0:3.29902e-30 [19:14:58] xor.cpp:114: ITER: 15000 Train Accuracy: 0.5 0:0.785589 1:1 1:0.805458 0:1.13999e-38 [19:14:59] xor.cpp:114: ITER: 20000 Train Accuracy: 0.5 0:0.785148 1:1 1:0.808582 0:0 [19:15:00] xor.cpp:114: ITER: 25000 Train Accuracy: 0.5 0:0.786545 1:1 1:0.812904 0:0 [19:15:00] xor.cpp:114: ITER: 30000 Train Accuracy: 0.5 0:0.784969 1:1 1:0.8189 0:0 [19:15:01] xor.cpp:114: ITER: 35000 Train Accuracy: 0.5 0:0.784798 1:1 1:0.82841 0:0 [19:15:02] xor.cpp:114: ITER: 40000 Train Accuracy: 0.5 0:0.787042 1:1 1:0.845301 0:0 [19:15:02] xor.cpp:114: ITER: 45000 Train Accuracy: 0.5 0:0.784718 1:1 1:0.879533 0:0 [19:15:03] xor.cpp:114: ITER: 50000 Train Accuracy: 0.5 0:0.783628 1:1 1:0.934087 0:0 [19:15:04] xor.cpp:114: ITER: 55000 Train Accuracy: 0.5 0:0.786908 1:1 1:0.948499 0:0 [19:15:04] xor.cpp:114: ITER: 60000 Train Accuracy: 0.5 0:0.784415 1:1 1:0.948458 0:0 [19:15:05] xor.cpp:114: ITER: 65000 Train Accuracy: 0.5 0:0.784358 1:1 1:0.948456 0:0 [19:15:06] xor.cpp:114: ITER: 70000 Train Accuracy: 0.5 0:0.784338 1:1 1:0.948455 0:0 [19:15:07] xor.cpp:114: ITER: 75000 Train Accuracy: 0.5 0:0.797159 1:1 1:0.948738 0:0 [19:15:07] xor.cpp:114: ITER: 80000 Train Accuracy: 0.5 0:0.784321 1:1 1:0.948455 0:0 [19:15:08] xor.cpp:114: ITER: 85000 Train Accuracy: 0.5 0:0.7837 1:1 1:0.948445 0:0 [19:15:09] xor.cpp:114: ITER: 90000 Train Accuracy: 0.5 0:0.78427 1:1 1:0.948449 0:0 [19:15:09] xor.cpp:114: ITER: 95000 Train Accuracy: 0.5 ``` Even after 10000 iterations, the neural net is not able to predict the XOR function correctly. I have tried different layer sizes (both increasing/decreasing the number of neurons) but it didn't help at all. **Is this a bug in the C++ API or is there any problem with my code.** @piiswrong ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
With regards, Apache Git Services