This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 2fe7aa4  Fix bug in symbolic RNN (#7282)
2fe7aa4 is described below

commit 2fe7aa4189941ec5d1673025336417248c22e38a
Author: Xu Dong <[email protected]>
AuthorDate: Fri Aug 4 04:04:52 2017 +0800

    Fix bug in symbolic RNN (#7282)
    
    * Remove forget_bais in ConvLSTM
    * Remove the hard code about conv_layout
    * Add interface for initializer
    * Remove repetitive code in _call_ function
---
 python/mxnet/rnn/rnn_cell.py      | 195 ++++++++++++++++++--------------------
 tests/python/unittest/test_rnn.py |   2 +-
 2 files changed, 92 insertions(+), 105 deletions(-)

diff --git a/python/mxnet/rnn/rnn_cell.py b/python/mxnet/rnn/rnn_cell.py
index 99d0e8a..c8213a2 100644
--- a/python/mxnet/rnn/rnn_cell.py
+++ b/python/mxnet/rnn/rnn_cell.py
@@ -1072,41 +1072,14 @@ class BidirectionalCell(BaseRNNCell):
 
 
 class BaseConvRNNCell(BaseRNNCell):
-    """Abstract base class for Convolutional RNN cells
-
-    Parameters
-    ----------
-    input_shape : tuple of int
-        Shape of input in single timestep.
-    num_hidden : int
-        Number of units in output symbol.
-    h2h_kernel : tuple of int
-        Kernel of Convolution operator in state-to-state transitions.
-    h2h_dilate : tuple of int
-        Dilation of Convolution operator in state-to-state transitions.
-    i2h_kernel : tuple of int
-        Kernel of Convolution operator in input-to-state transitions.
-    i2h_stride : tuple of int
-        Stride of Convolution operator in input-to-state transitions.
-    i2h_pad : tuple of int
-        Pad of Convolution operator in input-to-state transitions.
-    i2h_dilate : tuple of int
-        Dilation of Convolution operator in input-to-state transitions.
-    activation : str or Symbol,
-        Type of activation function.
-    prefix : str, default ''
-        Prefix for name of layers (and name of weight if params is None).
-    params : RNNParams, default None
-        Container for weight sharing between cells. Created if None.
-    conv_layout : str, , default 'NCHW'
-        Layout of ConvolutionOp
-    """
+    """Abstract base class for Convolutional RNN cells"""
     def __init__(self, input_shape, num_hidden,
                  h2h_kernel, h2h_dilate,
                  i2h_kernel, i2h_stride,
                  i2h_pad, i2h_dilate,
-                 activation,
-                 prefix='', params=None, conv_layout='NCHW'):
+                 i2h_weight_initializer, h2h_weight_initializer,
+                 i2h_bias_initializer, h2h_bias_initializer,
+                 activation, prefix='', params=None, conv_layout='NCHW'):
         super(BaseConvRNNCell, self).__init__(prefix=prefix, params=params)
         # Convolution setting
         self._h2h_kernel = h2h_kernel
@@ -1137,11 +1110,46 @@ class BaseConvRNNCell(BaseRNNCell):
         self._state_shape = 
self._state_shape.infer_shape(data=input_shape)[1][0]
         self._state_shape = (0, ) + self._state_shape[1:]
 
+        # Get params
+        self._iW = self.params.get('i2h_weight', init=i2h_weight_initializer)
+        self._hW = self.params.get('h2h_weight', init=h2h_weight_initializer)
+        self._iB = self.params.get('i2h_bias', init=i2h_bias_initializer)
+        self._hB = self.params.get('h2h_bias', init=h2h_bias_initializer)
+
+    @property
+    def _num_gates(self):
+        return len(self._gate_names)
+
     @property
     def state_info(self):
         return [{'shape': self._state_shape, '__layout__': self._conv_layout},
                 {'shape': self._state_shape, '__layout__': self._conv_layout}]
 
+    def _conv_forward(self, inputs, states, name):
+
+        i2h = symbol.Convolution(name='%si2h'%name,
+                                 data=inputs,
+                                 num_filter=self._num_hidden*self._num_gates,
+                                 kernel=self._i2h_kernel,
+                                 stride=self._i2h_stride,
+                                 pad=self._i2h_pad,
+                                 dilate=self._i2h_dilate,
+                                 weight=self._iW,
+                                 bias=self._iB,
+                                 layout=self._conv_layout)
+
+        h2h = symbol.Convolution(name='%sh2h'%name,
+                                 data=states[0],
+                                 num_filter=self._num_hidden*self._num_gates,
+                                 kernel=self._h2h_kernel,
+                                 dilate=self._h2h_dilate,
+                                 pad=self._h2h_pad,
+                                 stride=(1, 1),
+                                 weight=self._hW,
+                                 bias=self._hB,
+                                 layout=self._conv_layout)
+        return i2h, h2h
+
     def __call__(self, inputs, states):
         raise NotImplementedError("BaseConvRNNCell is abstract class for 
convolutional RNN")
 
@@ -1166,6 +1174,16 @@ class ConvRNNCell(BaseConvRNNCell):
         Pad of Convolution operator in input-to-state transitions.
     i2h_dilate : tuple of int, default (1, 1)
         Dilation of Convolution operator in input-to-state transitions.
+    i2h_weight_initializer : str or Initializer
+        Initializer for the input weights matrix, used for the convolution
+        transformation of the inputs.
+    h2h_weight_initializer : str or Initializer
+        Initializer for the recurrent weights matrix, used for the convolution
+        transformation of the recurrent state.
+    i2h_bias_initializer : str or Initializer, default zeros
+        Initializer for the bias vector.
+    h2h_bias_initializer : str or Initializer, default zeros
+        Initializer for the bias vector.
     activation : str or Symbol,
         default functools.partial(symbol.LeakyReLU, act_type='leaky', 
slope=0.2)
         Type of activation function.
@@ -1180,19 +1198,20 @@ class ConvRNNCell(BaseConvRNNCell):
                  h2h_kernel=(3, 3), h2h_dilate=(1, 1),
                  i2h_kernel=(3, 3), i2h_stride=(1, 1),
                  i2h_pad=(1, 1), i2h_dilate=(1, 1),
+                 i2h_weight_initializer=None, h2h_weight_initializer=None,
+                 i2h_bias_initializer='zeros', h2h_bias_initializer='zeros',
                  activation=functools.partial(symbol.LeakyReLU, 
act_type='leaky', slope=0.2),
                  prefix='ConvRNN_', params=None, conv_layout='NCHW'):
         super(ConvRNNCell, self).__init__(input_shape=input_shape, 
num_hidden=num_hidden,
                                           h2h_kernel=h2h_kernel, 
h2h_dilate=h2h_dilate,
                                           i2h_kernel=i2h_kernel, 
i2h_stride=i2h_stride,
                                           i2h_pad=i2h_pad, 
i2h_dilate=i2h_dilate,
+                                          
i2h_weight_initializer=i2h_weight_initializer,
+                                          
h2h_weight_initializer=h2h_weight_initializer,
+                                          
i2h_bias_initializer=i2h_bias_initializer,
+                                          
h2h_bias_initializer=h2h_bias_initializer,
                                           activation=activation, prefix=prefix,
                                           params=params, 
conv_layout=conv_layout)
-        # Get params
-        self._iW = self.params.get('i2h_weight')
-        self._hW = self.params.get('h2h_weight')
-        self._iB = self.params.get('i2h_bias')
-        self._hB = self.params.get('h2h_bias')
 
     @property
     def _gate_names(self):
@@ -1201,24 +1220,7 @@ class ConvRNNCell(BaseConvRNNCell):
     def __call__(self, inputs, states):
         self._counter += 1
         name = '%st%d_'%(self._prefix, self._counter)
-        i2h = symbol.Convolution(name='%si2h'%name,
-                                 data=inputs,
-                                 num_filter=self._num_hidden,
-                                 kernel=self._i2h_kernel,
-                                 stride=self._i2h_stride,
-                                 pad=self._i2h_pad,
-                                 dilate=self._i2h_dilate,
-                                 weight=self._iW,
-                                 bias=self._iB,)
-        h2h = symbol.Convolution(name='%sh2h'%name,
-                                 data=states[0],
-                                 num_filter=self._num_hidden,
-                                 kernel=self._h2h_kernel,
-                                 dilate=self._h2h_dilate,
-                                 pad=self._h2h_pad,
-                                 stride=(1, 1),
-                                 weight=self._hW,
-                                 bias=self._hB)
+        i2h, h2h = self._conv_forward(inputs, states, name)
         output = self._get_activation(i2h + h2h, self._activation,
                                       name='%sout'%name)
         return output, [output]
@@ -1248,6 +1250,16 @@ class ConvLSTMCell(BaseConvRNNCell):
         Pad of Convolution operator in input-to-state transitions.
     i2h_dilate : tuple of int, default (1, 1)
         Dilation of Convolution operator in input-to-state transitions.
+    i2h_weight_initializer : str or Initializer
+        Initializer for the input weights matrix, used for the convolution
+        transformation of the inputs.
+    h2h_weight_initializer : str or Initializer
+        Initializer for the recurrent weights matrix, used for the convolution
+        transformation of the recurrent state.
+    i2h_bias_initializer : str or Initializer, default zeros
+        Initializer for the bias vector.
+    h2h_bias_initializer : str or Initializer, default zeros
+        Initializer for the bias vector.
     activation : str or Symbol
         default functools.partial(symbol.LeakyReLU, act_type='leaky', 
slope=0.2)
         Type of activation function.
@@ -1255,8 +1267,6 @@ class ConvLSTMCell(BaseConvRNNCell):
         Prefix for name of layers (and name of weight if params is None).
     params : RNNParams, default None
         Container for weight sharing between cells. Created if None.
-    forget_bias : bias added to forget gate, default 1.0.
-        Jozefowicz et al. 2015 recommends setting this to 1.0
     conv_layout : str, , default 'NCHW'
         Layout of ConvolutionOp
     """
@@ -1264,23 +1274,22 @@ class ConvLSTMCell(BaseConvRNNCell):
                  h2h_kernel=(3, 3), h2h_dilate=(1, 1),
                  i2h_kernel=(3, 3), i2h_stride=(1, 1),
                  i2h_pad=(1, 1), i2h_dilate=(1, 1),
+                 i2h_weight_initializer=None, h2h_weight_initializer=None,
+                 i2h_bias_initializer='zeros', h2h_bias_initializer='zeros',
                  activation=functools.partial(symbol.LeakyReLU, 
act_type='leaky', slope=0.2),
-                 prefix='ConvLSTM_', params=None, forget_bias=1.0,
+                 prefix='ConvLSTM_', params=None,
                  conv_layout='NCHW'):
         super(ConvLSTMCell, self).__init__(input_shape=input_shape, 
num_hidden=num_hidden,
                                            h2h_kernel=h2h_kernel, 
h2h_dilate=h2h_dilate,
                                            i2h_kernel=i2h_kernel, 
i2h_stride=i2h_stride,
                                            i2h_pad=i2h_pad, 
i2h_dilate=i2h_dilate,
+                                           
i2h_weight_initializer=i2h_weight_initializer,
+                                           
h2h_weight_initializer=h2h_weight_initializer,
+                                           
i2h_bias_initializer=i2h_bias_initializer,
+                                           
h2h_bias_initializer=h2h_bias_initializer,
                                            activation=activation, 
prefix=prefix,
                                            params=params, 
conv_layout=conv_layout)
 
-        # Get params
-        self._iW = self.params.get('i2h_weight')
-        self._hW = self.params.get('h2h_weight')
-        # we add the forget_bias to i2h_bias, this adds the bias to the forget 
gate activation
-        self._iB = self.params.get('i2h_bias', 
init=init.LSTMBias(forget_bias=forget_bias))
-        self._hB = self.params.get('h2h_bias')
-
     @property
     def _gate_names(self):
         return ['_i', '_f', '_c', '_o']
@@ -1288,25 +1297,7 @@ class ConvLSTMCell(BaseConvRNNCell):
     def __call__(self, inputs, states):
         self._counter += 1
         name = '%st%d_'%(self._prefix, self._counter)
-        i2h = symbol.Convolution(name='%si2h'%name,
-                                 data=inputs,
-                                 num_filter=self._num_hidden*4,
-                                 kernel=self._i2h_kernel,
-                                 stride=self._i2h_stride,
-                                 pad=self._i2h_pad,
-                                 dilate=self._i2h_dilate,
-                                 weight=self._iW,
-                                 bias=self._iB,)
-        h2h = symbol.Convolution(name='%sh2h'%name,
-                                 data=states[0],
-                                 num_filter=self._num_hidden*4,
-                                 kernel=self._h2h_kernel,
-                                 dilate=self._h2h_dilate,
-                                 pad=self._h2h_pad,
-                                 stride=(1, 1),
-                                 weight=self._hW,
-                                 bias=self._hB)
-
+        i2h, h2h = self._conv_forward(inputs, states, name)
         gates = i2h + h2h
         slice_gates = symbol.SliceChannel(gates, num_outputs=4, 
axis=self._conv_layout.find('C'),
                                           name="%sslice"%name)
@@ -1346,6 +1337,16 @@ class ConvGRUCell(BaseConvRNNCell):
         Pad of Convolution operator in input-to-state transitions.
     i2h_dilate : tuple of int, default (1, 1)
         Dilation of Convolution operator in input-to-state transitions.
+    i2h_weight_initializer : str or Initializer
+        Initializer for the input weights matrix, used for the convolution
+        transformation of the inputs.
+    h2h_weight_initializer : str or Initializer
+        Initializer for the recurrent weights matrix, used for the convolution
+        transformation of the recurrent state.
+    i2h_bias_initializer : str or Initializer, default zeros
+        Initializer for the bias vector.
+    h2h_bias_initializer : str or Initializer, default zeros
+        Initializer for the bias vector.
     activation : str or Symbol,
         default functools.partial(symbol.LeakyReLU, act_type='leaky', 
slope=0.2)
         Type of activation function.
@@ -1360,19 +1361,20 @@ class ConvGRUCell(BaseConvRNNCell):
                  h2h_kernel=(3, 3), h2h_dilate=(1, 1),
                  i2h_kernel=(3, 3), i2h_stride=(1, 1),
                  i2h_pad=(1, 1), i2h_dilate=(1, 1),
+                 i2h_weight_initializer=None, h2h_weight_initializer=None,
+                 i2h_bias_initializer='zeros', h2h_bias_initializer='zeros',
                  activation=functools.partial(symbol.LeakyReLU, 
act_type='leaky', slope=0.2),
                  prefix='ConvGRU_', params=None, conv_layout='NCHW'):
         super(ConvGRUCell, self).__init__(input_shape=input_shape, 
num_hidden=num_hidden,
                                           h2h_kernel=h2h_kernel, 
h2h_dilate=h2h_dilate,
                                           i2h_kernel=i2h_kernel, 
i2h_stride=i2h_stride,
                                           i2h_pad=i2h_pad, 
i2h_dilate=i2h_dilate,
+                                          
i2h_weight_initializer=i2h_weight_initializer,
+                                          
h2h_weight_initializer=h2h_weight_initializer,
+                                          
i2h_bias_initializer=i2h_bias_initializer,
+                                          
h2h_bias_initializer=h2h_bias_initializer,
                                           activation=activation, prefix=prefix,
                                           params=params, 
conv_layout=conv_layout)
-        # Get params
-        self._iW = self.params.get('i2h_weight')
-        self._hW = self.params.get('h2h_weight')
-        self._iB = self.params.get('i2h_bias')
-        self._hB = self.params.get('h2h_bias')
 
     @property
     def _gate_names(self):
@@ -1382,22 +1384,7 @@ class ConvGRUCell(BaseConvRNNCell):
         self._counter += 1
         seq_idx = self._counter
         name = '%st%d_' % (self._prefix, seq_idx)
-        i2h = symbol.Convolution(name='%s_i2h'%name, data=inputs,
-                                 num_filter=self._num_hidden * 3,
-                                 kernel=self._i2h_kernel,
-                                 stride=self._i2h_stride,
-                                 pad=self._i2h_pad,
-                                 dilate=self._i2h_dilate,
-                                 weight=self._iW,
-                                 bias=self._iB,)
-        h2h = symbol.Convolution(name='%s_h2h'%name, data=states[0],
-                                 num_filter=self._num_hidden * 3,
-                                 kernel=self._h2h_kernel,
-                                 dilate=self._h2h_dilate,
-                                 pad=self._h2h_pad,
-                                 stride=(1, 1),
-                                 weight=self._hW,
-                                 bias=self._hB)
+        i2h, h2h = self._conv_forward(inputs, states, name)
 
         i2h_r, i2h_z, i2h = symbol.SliceChannel(i2h, num_outputs=3, 
name="%s_i2h_slice" % name)
         h2h_r, h2h_z, h2h = symbol.SliceChannel(h2h, num_outputs=3, 
name="%s_h2h_slice" % name)
diff --git a/tests/python/unittest/test_rnn.py 
b/tests/python/unittest/test_rnn.py
index 75f41fe..e8176bb 100644
--- a/tests/python/unittest/test_rnn.py
+++ b/tests/python/unittest/test_rnn.py
@@ -254,7 +254,7 @@ def test_convlstm():
                                h2h_kernel=(3, 3), h2h_dilate=(1, 1),
                                i2h_kernel=(3, 3), i2h_stride=(1, 1),
                                i2h_pad=(1, 1), i2h_dilate=(1, 1),
-                               prefix='rnn_', forget_bias=1.0)
+                               prefix='rnn_')
     inputs = [mx.sym.Variable('rnn_t%d_data'%i) for i in range(3)]
     outputs, _ = cell.unroll(3, inputs)
     outputs = mx.sym.Group(outputs)

-- 
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].

Reply via email to