chenjunweii opened a new issue #8108: [C++] Some Variable Name cause no Gradient
URL: https://github.com/apache/incubator-mxnet/issues/8108
 
 
   I just encounter a strange problem when i use cpp - package, i found my 
network can't trained properly, so i dig into the code, debug it step by step, 
and i found that some weight has no gradient after "Backward", it just stay at 
its initial value.
   
   ```
   typedef vector <mx_uint> mx_shape;
   
   typedef vector <mx_shape> vmx_shape;
   
   
   int main(){
        
           Context ctx(DeviceType::kGPU, 0);
   
        char * name = "wfx1";  // weight name
   
        Symbol inputs = Symbol::Variable("inputs");
        
        Symbol weight = Symbol::Variable(name);
   
        Symbol bias = Symbol::Variable("bfc1");
   
        Symbol fc = MakeLoss("loss", FullyConnected(name, inputs, weight, bias, 
1));
   
        map <string, mx_shape> input_shape;
   
        input_shape["inputs"] = {64, 784};
   
        vmx_shape arg, out, aux;
   
        fc.InferShape(input_shape, &arg, &aux, &out);
        
        map <string, NDArray> nd, grad;
        
        nd["inputs"] = NDArray(Shape(1, 30), ctx);
   
        nd[name] = NDArray(Shape(1, 30), ctx);
   
        nd["bfc1"] = NDArray(Shape(1), ctx);
   
        grad["inputs"] = NDArray(Shape(1, 30), ctx);
   
        grad[name] = NDArray(Shape(1, 30), ctx);
   
        grad["bfc1"] = NDArray(Shape(1), ctx);
        
        Xavier xavier;
   
        Zero zero;
   
        xavier("inputs", &nd["inputs"]);
   
        xavier(name, &nd[name]);
                
        grad[name] = 1.999; // initialize the grad to 1.999
   
        zero("b", &nd["bfc1"]);
        
        zero("b", &grad["bfc1"]);
   
        vector <string> node = fc.ListArguments();
   
        Executor *exe = fc.SimpleBind(ctx, nd, grad);
                
        Optimizer * optimizer = OptimizerRegistry::Find("sgd");
   
        for (int i = 0; i != 10; ++i){
   
                (*exe).Forward(true);
   
                (*exe).Backward();
                        
                for (int j = 0; j != (*exe).arg_arrays.size(); j++){
   
                        (*optimizer).Update(j, (*exe).arg_arrays[j], 
(*exe).grad_arrays[j]);
                        
                        cout << node[j] << " : " << (*exe).grad_arrays[j] << 
endl;
   
                }
                
                
                vector <NDArray> out = (*exe).outputs;
   
                cout << "Exe : " << out[0] << endl;
                
        }
   }
   
   ```
   
   
   
   when Variable name set to wfx1, gradient didn't change after "exe->backward"
   
   (there still other name that cause the problem, like wfc1, wgc, fgc .... )
   
   wfx1 : [1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 
1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 
1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 1.999, 1.999, ]
   
   but when Variable name set to wfx12, the gradient can be computed properly
   
   wfx12 : [0.158775, -0.262494, 0.0047116, -0.128555, 1.13857, -0.0230267, 
0.063212, -0.0625605, 0.00763228, 0.175106, 0.536074, -0.254418, 0.228844, 
-0.354217, -0.689576, 0.705991, 0.0421484, -0.58815, 0.102496, 0.432797, 
-0.028993, -0.329697, -0.534513, -0.401094, -0.0141996, -0.355742, 0.647178, 
-0.0944227, 0.136721, 0.325094, ]
   
   
   any idea ?  thx !
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to