cchung100m closed pull request #13815: [issue_12205 - PART1] solve pylint 
errors in examples with issue no.12205
URL: https://github.com/apache/incubator-mxnet/pull/13815
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/example/autoencoder/mnist_sae.py b/example/autoencoder/mnist_sae.py
index 886f2a16a86..04e4e75f665 100644
--- a/example/autoencoder/mnist_sae.py
+++ b/example/autoencoder/mnist_sae.py
@@ -21,8 +21,8 @@
 import argparse
 import logging
 
-import mxnet as mx
 import numpy as np
+import mxnet as mx
 import data
 from autoencoder import AutoEncoderModel
 
diff --git a/example/autoencoder/model.py b/example/autoencoder/model.py
index 9b6185c9fd1..b388c551387 100644
--- a/example/autoencoder/model.py
+++ b/example/autoencoder/model.py
@@ -18,8 +18,9 @@
 # pylint: disable=missing-docstring
 from __future__ import print_function
 
-import mxnet as mx
 import numpy as np
+import mxnet as mx
+
 try:
     import cPickle as pickle
 except ImportError:
@@ -53,7 +54,7 @@ def extract_feature(sym, args, auxs, data_iter, N, 
xpu=mx.cpu()):
 
 
 class MXModel(object):
-    def __init__(self, xpu=mx.cpu(), *args, **kwargs):
+    def __init__(self, *args, xpu=mx.cpu(), **kwargs):
         self.xpu = xpu
         self.loss = None
         self.args = {}
diff --git a/example/autoencoder/solver.py b/example/autoencoder/solver.py
index 0c990ce7423..79fe5c69add 100644
--- a/example/autoencoder/solver.py
+++ b/example/autoencoder/solver.py
@@ -19,9 +19,8 @@
 from __future__ import print_function
 
 import logging
-
-import mxnet as mx
 import numpy as np
+import mxnet as mx
 
 
 class Monitor(object):
diff --git a/example/bayesian-methods/algos.py 
b/example/bayesian-methods/algos.py
index f7b36207079..91f17f88681 100644
--- a/example/bayesian-methods/algos.py
+++ b/example/bayesian-methods/algos.py
@@ -14,13 +14,15 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+"""
+Create implementation of algorithms of HMC, stepHMC, SGD, SGLD and 
DistilledSGLD
+"""
 from __future__ import print_function
+import time
+import numpy
 import mxnet as mx
 import mxnet.ndarray as nd
-import time
-import logging
-from utils import *
+from utils import copy_param, get_executor, sample_test_regression, 
sample_test_acc
 
 
 def calc_potential(exe, params, label_name, noise_precision, prior_precision):
@@ -35,6 +37,9 @@ def calc_potential(exe, params, label_name, noise_precision, 
prior_precision):
 
 
 def calc_grad(exe, exe_grads, params, X, Y, label_name=None, outgrad_f=None):
+    """
+    Calculate gradient
+    """
     exe.copy_params_from(params)
     exe.arg_dict['data'][:] = X
     if outgrad_f is None:
@@ -48,8 +53,10 @@ def calc_grad(exe, exe_grads, params, X, Y, label_name=None, 
outgrad_f=None):
         v.wait_to_read()
 
 
-def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, 
prior_precision, L=10,
-             eps=1E-6):
+def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, 
prior_precision, L=10, eps=1E-6):
+    """
+    Generate the implementation of step HMC
+    """
     init_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
     end_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
     init_momentums = {k: mx.random.normal(0, 1, v.shape) for k, v in 
init_params.items()}
@@ -102,6 +109,9 @@ def step_HMC(exe, exe_params, exe_grads, label_key, 
noise_precision, prior_preci
 def HMC(sym, data_inputs, X, Y, X_test, Y_test, sample_num,
         initializer=None, noise_precision=1 / 9.0, prior_precision=0.1,
         learning_rate=1E-6, L=10, dev=mx.gpu()):
+    """
+    Generate the implementation of HMC
+    """
     label_key = list(set(data_inputs.keys()) - set(['data']))[0]
     exe, exe_params, exe_grads, _ = get_executor(sym, dev, data_inputs, 
initializer)
     exe.arg_dict['data'][:] = X
@@ -134,6 +144,9 @@ def SGD(sym, data_inputs, X, Y, X_test, Y_test, 
total_iter_num,
         out_grad_f=None,
         initializer=None,
         minibatch_size=100, dev=mx.gpu()):
+    """
+    Generate the implementation of SGD
+    """
     if out_grad_f is None:
         label_key = list(set(data_inputs.keys()) - set(['data']))[0]
     exe, params, params_grad, _ = get_executor(sym, dev, data_inputs, 
initializer)
@@ -173,6 +186,9 @@ def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
          initializer=None,
          minibatch_size=100, thin_interval=100, burn_in_iter_num=1000, 
task='classification',
          dev=mx.gpu()):
+    """
+    Generate the implementation of SGLD
+    """
     if out_grad_f is None:
         label_key = list(set(data_inputs.keys()) - set(['data']))[0]
     exe, params, params_grad, _ = get_executor(sym, dev, data_inputs, 
initializer)
@@ -200,7 +216,7 @@ def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
         if i < burn_in_iter_num:
             continue
         else:
-            if 0 == (i - burn_in_iter_num) % thin_interval:
+            if (i - burn_in_iter_num) % thin_interval == 0:
                 if optimizer.lr_scheduler is not None:
                     lr = optimizer.lr_scheduler(optimizer.num_update)
                 else:
@@ -238,6 +254,9 @@ def DistilledSGLD(teacher_sym, student_sym,
                   minibatch_size=100,
                   task='classification',
                   dev=mx.gpu()):
+    """
+    Generate the implementation of DistilledSGLD
+    """
     teacher_exe, teacher_params, teacher_params_grad, _ = \
         get_executor(teacher_sym, dev, teacher_data_inputs, 
teacher_initializer)
     student_exe, student_params, student_params_grad, _ = \
@@ -323,13 +342,14 @@ def DistilledSGLD(teacher_sym, student_sym,
                     sample_test_acc(teacher_exe, X=X, Y=Y, label_num=10,
                                     minibatch_size=minibatch_size)
                 print("Student: Test ACC %d/%d=%f, Train ACC %d/%d=%f" % 
(test_correct, test_total,
-                                                    test_acc, train_correct, 
train_total, train_acc))
+                                                                          
test_acc, train_correct,
+                                                                          
train_total, train_acc))
                 print("Teacher: Test ACC %d/%d=%f, Train ACC %d/%d=%f" \
                       % (teacher_test_correct, teacher_test_total, 
teacher_test_acc,
                          teacher_train_correct, teacher_train_total, 
teacher_train_acc))
             else:
                 print("Current Iter Num: %d" % (i + 1), "Time Spent: %f" % 
(end - start), "MSE:",
-                       sample_test_regression(exe=student_exe, X=X_test, 
Y=Y_test,
+                      sample_test_regression(exe=student_exe, X=X_test, 
Y=Y_test,
                                              minibatch_size=minibatch_size,
                                              save_path='regression_DSGLD.txt'))
             start = time.time()
diff --git a/example/bayesian-methods/bdk_demo.py 
b/example/bayesian-methods/bdk_demo.py
index 145dac10e2a..3c3d1f8c2f1 100644
--- a/example/bayesian-methods/bdk_demo.py
+++ b/example/bayesian-methods/bdk_demo.py
@@ -14,21 +14,25 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+"""
+Run Stochastic Gradient Langevin Dynamics (SGLD) and Bayesian Dark Knowledge 
(BDK)
+"""
 from __future__ import print_function
-import mxnet as mx
-import mxnet.ndarray as nd
+import argparse
+import time
 import numpy
-import logging
 import matplotlib.pyplot as plt
-from scipy.stats import gaussian_kde
-import argparse
-from algos import *
-from data_loader import *
-from utils import *
+import mxnet as mx
+import mxnet.ndarray as nd
+from algos import HMC, SGD, SGLD, DistilledSGLD
+from data_loader import load_mnist, load_toy, load_synthetic
+from utils import BiasXavier, SGLDScheduler
 
 
 class CrossEntropySoftmax(mx.operator.NumpyOp):
+    """
+    Calculate CrossEntropy softmax function
+    """
     def __init__(self):
         super(CrossEntropySoftmax, self).__init__(False)
 
@@ -58,6 +62,9 @@ def backward(self, out_grad, in_data, out_data, in_grad):
 
 
 class LogSoftmax(mx.operator.NumpyOp):
+    """
+    Generate helper functions to evaluate softmax loss function
+    """
     def __init__(self):
         super(LogSoftmax, self).__init__(False)
 
@@ -103,6 +110,9 @@ def regression_student_grad(student_outputs, teacher_pred, 
teacher_noise_precisi
 
 
 def get_mnist_sym(output_op=None, num_hidden=400):
+    """
+    get symbol of mnist
+    """
     net = mx.symbol.Variable('data')
     net = mx.symbol.FullyConnected(data=net, name='mnist_fc1', 
num_hidden=num_hidden)
     net = mx.symbol.Activation(data=net, name='mnist_relu1', act_type="relu")
@@ -117,6 +127,9 @@ def get_mnist_sym(output_op=None, num_hidden=400):
 
 
 def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, 
grad=None):
+    """
+    Get synthetic gradient value
+    """
     if grad is None:
         grad = nd.empty(theta.shape, theta.context)
     theta1 = theta.asnumpy()[0]
@@ -128,17 +141,18 @@ def synthetic_grad(X, theta, sigma1, sigma2, sigmax, 
rescale_grad=1.0, grad=None
         -(X - theta1 - theta2) ** 2 / (2 * vx))
     grad_npy = numpy.zeros(theta.shape)
     grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * 
(X - theta1) / vx
-                                    + numpy.exp(-(X - theta1 - theta2) ** 2 / 
(2 * vx)) * (
-                                    X - theta1 - theta2) / vx) / 
denominator).sum() \
-                  + theta1 / v1
-    grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 
* vx)) * (
-    X - theta1 - theta2) / vx) / denominator).sum() \
-                  + theta2 / v2
+                                    + numpy.exp(-(X - theta1 - theta2) ** 2 / 
(2 * vx)) *
+                                    (X - theta1 - theta2) / vx) / 
denominator).sum() + theta1 / v1
+    grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 
* vx)) *
+                                    (X - theta1 - theta2) / vx) / 
denominator).sum() + theta2 / v2
     grad[:] = grad_npy
     return grad
 
 
 def get_toy_sym(teacher=True, teacher_noise_precision=None):
+    """
+    Get toy symbol
+    """
     if teacher:
         net = mx.symbol.Variable('data')
         net = mx.symbol.FullyConnected(data=net, name='teacher_fc1', 
num_hidden=100)
@@ -160,8 +174,8 @@ def dev():
     return mx.gpu()
 
 
-def run_mnist_SGD(training_num=50000):
-    X, Y, X_test, Y_test = load_mnist(training_num)
+def run_mnist_SGD(num_training=50000):
+    X, Y, X_test, Y_test = load_mnist(num_training)
     minibatch_size = 100
     net = get_mnist_sym()
     data_shape = (minibatch_size,) + X.shape[1::]
@@ -175,8 +189,8 @@ def run_mnist_SGD(training_num=50000):
                              lr=5E-6, prior_precision=1.0, minibatch_size=100)
 
 
-def run_mnist_SGLD(training_num=50000):
-    X, Y, X_test, Y_test = load_mnist(training_num)
+def run_mnist_SGLD(num_training=50000):
+    X, Y, X_test, Y_test = load_mnist(num_training)
     minibatch_size = 100
     net = get_mnist_sym()
     data_shape = (minibatch_size,) + X.shape[1::]
@@ -191,10 +205,13 @@ def run_mnist_SGLD(training_num=50000):
                             thin_interval=100, burn_in_iter_num=1000)
 
 
-def run_mnist_DistilledSGLD(training_num=50000):
-    X, Y, X_test, Y_test = load_mnist(training_num)
+def run_mnist_DistilledSGLD(num_training=50000):
+    """
+    Run DistilledSGLD on mnist dataset
+    """
+    X, Y, X_test, Y_test = load_mnist(num_training)
     minibatch_size = 100
-    if training_num >= 10000:
+    if num_training >= 10000:
         num_hidden = 800
         total_iter_num = 1000000
         teacher_learning_rate = 1E-6
@@ -235,6 +252,9 @@ def run_mnist_DistilledSGLD(training_num=50000):
 
 
 def run_toy_SGLD():
+    """
+    Run SGLD on toy dataset
+    """
     X, Y, X_test, Y_test = load_toy()
     minibatch_size = 1
     teacher_noise_precision = 1.0 / 9.0
@@ -243,20 +263,28 @@ def run_toy_SGLD():
     data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                    'teacher_output_label': nd.zeros((minibatch_size, 1), 
ctx=dev())}
     initializer = mx.init.Uniform(0.07)
-    exe, params, _ = \
-        SGLD(sym=net, data_inputs=data_inputs,
-             X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000,
-             initializer=initializer,
-             learning_rate=1E-4,
-             #         lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 
0.5),
-             prior_precision=0.1,
-             burn_in_iter_num=1000,
-             thin_interval=10,
-             task='regression',
-             minibatch_size=minibatch_size, dev=dev())
+    exe, params = SGLD(sym=net,
+                       data_inputs=data_inputs,
+                       X=X,
+                       Y=Y,
+                       X_test=X_test,
+                       Y_test=Y_test,
+                       total_iter_num=50000,
+                       initializer=initializer,
+                       learning_rate=1E-4,
+                       # lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 
0.5),
+                       prior_precision=0.1,
+                       burn_in_iter_num=1000,
+                       thin_interval=10,
+                       task='regression',
+                       minibatch_size=minibatch_size,
+                       dev=dev())  # disable=unbalanced-tuple-unpacking
 
 
 def run_toy_DistilledSGLD():
+    """
+    Run DistilledSGLD on toy dataset
+    """
     X, Y, X_test, Y_test = load_toy()
     minibatch_size = 1
     teacher_noise_precision = 1.0
@@ -288,6 +316,9 @@ def run_toy_DistilledSGLD():
 
 
 def run_toy_HMC():
+    """
+    Run HMC on toy dataset
+    """
     X, Y, X_test, Y_test = load_toy()
     minibatch_size = Y.shape[0]
     noise_precision = 1 / 9.0
@@ -302,6 +333,9 @@ def run_toy_HMC():
 
 
 def run_synthetic_SGLD():
+    """
+    Run synthetic SGLD
+    """
     theta1 = 0
     theta2 = 1
     sigma1 = numpy.sqrt(10)
@@ -322,14 +356,14 @@ def run_synthetic_SGLD():
     grad = nd.empty((2,), mx.cpu())
     samples = numpy.zeros((2, total_iter_num))
     start = time.time()
-    for i in xrange(total_iter_num):
+    for i in range(total_iter_num):
         if (i + 1) % 100000 == 0:
             end = time.time()
             print("Iter:%d, Time spent: %f" % (i + 1, end - start))
             start = time.time()
         ind = numpy.random.randint(0, X.shape[0])
-        synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad=
-        X.shape[0] / float(minibatch_size), grad=grad)
+        synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax,
+                       rescale_grad=X.shape[0] / float(minibatch_size), 
grad=grad)
         updater('theta', grad, theta)
         samples[:, i] = theta.asnumpy()
     plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet)
@@ -353,18 +387,18 @@ def run_synthetic_SGLD():
     args = parser.parse_args()
     training_num = args.training
     if args.dataset == 1:
-        if 0 == args.algorithm:
+        if args.algorithm == 0:
             run_mnist_SGD(training_num)
-        elif 1 == args.algorithm:
+        elif args.algorithm == 1:
             run_mnist_SGLD(training_num)
         else:
             run_mnist_DistilledSGLD(training_num)
     elif args.dataset == 0:
-        if 1 == args.algorithm:
+        if args.algorithm == 1:
             run_toy_SGLD()
-        elif 2 == args.algorithm:
+        elif args.algorithm == 2:
             run_toy_DistilledSGLD()
-        elif 3 == args.algorithm:
+        elif args.algorithm == 3:
             run_toy_HMC()
     else:
         run_synthetic_SGLD()
diff --git a/example/bayesian-methods/data_loader.py 
b/example/bayesian-methods/data_loader.py
index 92ca0cfb3a6..a800fe72d6a 100644
--- a/example/bayesian-methods/data_loader.py
+++ b/example/bayesian-methods/data_loader.py
@@ -14,14 +14,19 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+"""
+Create helper functions to load mnist dataset and toy dataset
+"""
 from __future__ import print_function
-import numpy
 import os
 import ssl
+import numpy
 
 
 def load_mnist(training_num=50000):
+    """
+    Load mnist dataset
+    """
     data_path = os.path.join(os.path.dirname(os.path.realpath('__file__')), 
'mnist.npz')
     if not os.path.isfile(data_path):
         from six.moves import urllib
diff --git a/example/bayesian-methods/utils.py 
b/example/bayesian-methods/utils.py
index a2744373e87..e1e6e34fc61 100644
--- a/example/bayesian-methods/utils.py
+++ b/example/bayesian-methods/utils.py
@@ -14,11 +14,12 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+"""
+Generate helper functions to Stochastic Gradient Langevin Dynamics (SGLD) and 
Bayesian Dark Knowledge (BDK)
+"""
+import numpy
 import mxnet as mx
 import mxnet.ndarray as nd
-import numpy
-import logging
 
 
 class BiasXavier(mx.initializer.Xavier):
@@ -26,7 +27,11 @@ def _init_bias(self, _, arr):
         scale = numpy.sqrt(self.magnitude / arr.shape[0])
         mx.random.uniform(-scale, scale, out=arr)
 
+
 class SGLDScheduler(mx.lr_scheduler.LRScheduler):
+    """
+    Create SGLDScheduler class
+    """
     def __init__(self, begin_rate, end_rate, total_iter_num, factor):
         super(SGLDScheduler, self).__init__()
         if factor >= 1.0:
@@ -44,7 +49,11 @@ def __call__(self, num_update):
         self.count += 1
         return self.base_lr
 
+
 def get_executor(sym, ctx, data_inputs, initializer=None):
+    """
+    Get executor to Stochastic Gradient Langevin Dynamics and/or Bayesian Dark 
Knowledge
+    """
     data_shapes = {k: v.shape for k, v in data_inputs.items()}
     arg_names = sym.list_arguments()
     aux_names = sym.list_auxiliary_states()
@@ -62,14 +71,22 @@ def get_executor(sym, ctx, data_inputs, initializer=None):
             initializer(k, v)
     return exe, params, params_grad, aux_states
 
+
 def copy_param(exe, new_param=None):
+    """
+    Create copy of parameters
+    """
     if new_param is None:
-        new_param = {k: nd.empty(v.shape, ctx=mx.cpu()) for k,v in 
exe.arg_dict.items()}
+        new_param = {k: nd.empty(v.shape, ctx=mx.cpu()) for k, v in 
exe.arg_dict.items()}
     for k, v in new_param.items():
         exe.arg_dict[k].copyto(v)
     return new_param
 
+
 def sample_test_acc(exe, X, Y, sample_pool=None, label_num=None, 
minibatch_size=100):
+    """
+    Generate sample test to evaluate accuracy
+    """
     if label_num is None:
         pred = numpy.zeros((X.shape[0],)).astype('float32')
     else:
@@ -89,12 +106,12 @@ def sample_test_acc(exe, X, Y, sample_pool=None, 
label_num=None, minibatch_size=
     else:
         old_param = copy_param(exe)
         for sample in sample_pool:
-            if type(sample) is list:
+            if isinstance(sample, list):
                 denominator += sample[0]
             else:
                 denominator += 1.0
         for sample in sample_pool:
-            if type(sample) is list:
+            if isinstance(sample, list):
                 ratio = sample[0]/denominator
                 param = sample[1]
             else:
@@ -118,11 +135,14 @@ def sample_test_acc(exe, X, Y, sample_pool=None, 
label_num=None, minibatch_size=
 
 
 def sample_test_regression(exe, X, Y, sample_pool=None, minibatch_size=100, 
save_path="regression.txt"):
+    """
+    Generate a sample test regression
+    """
     old_param = copy_param(exe)
     if sample_pool is not None:
         pred = numpy.zeros(Y.shape + (len(sample_pool),))
         ratio = numpy.zeros((len(sample_pool),))
-        if type(sample_pool[0]) is list:
+        if isinstance(sample_pool[0], list):
             denominator = sum(sample[0] for sample in sample_pool)
             for i, sample in enumerate(sample_pool):
                 ratio[i] = sample[0]/float(denominator)
@@ -130,7 +150,7 @@ def sample_test_regression(exe, X, Y, sample_pool=None, 
minibatch_size=100, save
             ratio[:] = 1.0/ Y.shape[0]
         iterator = mx.io.NDArrayIter(data=X, label=Y, 
batch_size=minibatch_size, shuffle=False)
         for i, sample in enumerate(sample_pool):
-            if type(sample) is list:
+            if isinstance(sample, list):
                 sample_param = sample[1]
             else:
                 sample_param = sample
@@ -146,7 +166,7 @@ def sample_test_regression(exe, X, Y, sample_pool=None, 
minibatch_size=100, save
                 curr_instance += batch_len
         mean = pred.mean(axis=2)
         var = pred.std(axis=2)**2
-        #print numpy.concatenate((Y, mean), axis=1)
+        # print numpy.concatenate((Y, mean), axis=1)
         mse = numpy.square(Y.reshape((Y.shape[0], )) - 
mean.reshape((mean.shape[0], ))).mean()
         numpy.savetxt(save_path, numpy.concatenate((mean, var), axis=1))
     else:
@@ -157,15 +177,21 @@ def sample_test_regression(exe, X, Y, sample_pool=None, 
minibatch_size=100, save
         for batch in iterator:
             exe.arg_dict['data'][:] = batch.data[0]
             exe.forward(is_train=False)
-            mean_var[curr_instance:curr_instance + minibatch_size - batch.pad, 
0] = exe.outputs[0].asnumpy()[:minibatch_size - batch.pad].flatten()
-            mean_var[curr_instance:curr_instance + minibatch_size - batch.pad, 
1] = numpy.exp(exe.outputs[1].asnumpy())[:minibatch_size - batch.pad].flatten()
+            mean_var[curr_instance:curr_instance + minibatch_size - batch.pad, 
0] =\
+                exe.outputs[0].asnumpy()[:minibatch_size - batch.pad].flatten()
+            mean_var[curr_instance:curr_instance + minibatch_size - batch.pad, 
1] = \
+                numpy.exp(exe.outputs[1].asnumpy())[:minibatch_size - 
batch.pad].flatten()
             curr_instance += minibatch_size - batch.pad
         mse = numpy.square(Y.reshape((Y.shape[0],)) - mean_var[:, 0]).mean()
         numpy.savetxt(save_path, mean_var)
     exe.copy_params_from(old_param)
     return mse
 
+
 def pred_test(testing_data, exe, param_list=None, save_path=""):
+    """
+    Generate prediction on testset
+    """
     ret = numpy.zeros((testing_data.shape[0], 2))
     if param_list is None:
         for i in range(testing_data.shape[0]):
@@ -177,8 +203,8 @@ def pred_test(testing_data, exe, param_list=None, 
save_path=""):
     else:
         for i in range(testing_data.shape[0]):
             pred = numpy.zeros((len(param_list),))
-            for j in range(len(param_list)):
-                exe.copy_params_from(param_list[j])
+            for (j, param) in enumerate(param_list):
+                exe.copy_params_from(param)
                 exe.arg_dict['data'][:] = testing_data[i, 0]
                 exe.forward(is_train=False)
                 pred[j] = exe.outputs[0].asnumpy()
diff --git a/example/caffe/caffe_net.py b/example/caffe/caffe_net.py
index 0459c901e1c..aa607a235f8 100644
--- a/example/caffe/caffe_net.py
+++ b/example/caffe/caffe_net.py
@@ -14,29 +14,37 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+"""
+Generate helper functions to load Caffe into MXNet
+"""
+import argparse
 import mxnet as mx
 from data import get_iterator
-import argparse
 import train_model
 
+
 def get_mlp():
     """
     multi-layer perceptron
     """
     data = mx.symbol.Variable('data')
-    fc1  = mx.symbol.CaffeOp(data_0=data, num_weight=2, name='fc1', 
prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 128} }")
+    fc1 = mx.symbol.CaffeOp(data_0=data, num_weight=2, name='fc1',
+                            prototxt="layer{type:\"InnerProduct\" 
inner_product_param{num_output: 128} }")
     act1 = mx.symbol.CaffeOp(data_0=fc1, prototxt="layer{type:\"TanH\"}")
-    fc2  = mx.symbol.CaffeOp(data_0=act1, num_weight=2, name='fc2', 
prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 64} }")
+    fc2 = mx.symbol.CaffeOp(data_0=act1, num_weight=2, name='fc2',
+                            prototxt="layer{type:\"InnerProduct\" 
inner_product_param{num_output: 64} }")
     act2 = mx.symbol.CaffeOp(data_0=fc2, prototxt="layer{type:\"TanH\"}")
-    fc3 = mx.symbol.CaffeOp(data_0=act2, num_weight=2, name='fc3', 
prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 10}}")
+    fc3 = mx.symbol.CaffeOp(data_0=act2, num_weight=2, name='fc3',
+                            prototxt="layer{type:\"InnerProduct\" 
inner_product_param{num_output: 10}}")
     if use_caffe_loss:
         label = mx.symbol.Variable('softmax_label')
-        mlp = mx.symbol.CaffeLoss(data=fc3, label=label, grad_scale=1, 
name='softmax', prototxt="layer{type:\"SoftmaxWithLoss\"}")
+        mlp = mx.symbol.CaffeLoss(data=fc3, label=label, grad_scale=1, 
name='softmax',
+                                  prototxt="layer{type:\"SoftmaxWithLoss\"}")
     else:
         mlp = mx.symbol.SoftmaxOutput(data=fc3, name='softmax')
     return mlp
 
+
 def get_lenet():
     """
     LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick
@@ -46,32 +54,46 @@ def get_lenet():
     data = mx.symbol.Variable('data')
 
     # first conv
-    conv1 = mx.symbol.CaffeOp(data_0=data, num_weight=2, 
prototxt="layer{type:\"Convolution\" convolution_param { num_output: 20 
kernel_size: 5 stride: 1} }")
+    conv1 = mx.symbol.CaffeOp(data_0=data, num_weight=2,
+                              prototxt="layer{type:\"Convolution\" "
+                                       "convolution_param { num_output: 20 
kernel_size: 5 stride: 1} }")
     act1 = mx.symbol.CaffeOp(data_0=conv1, prototxt="layer{type:\"TanH\"}")
-    pool1 = mx.symbol.CaffeOp(data_0=act1, prototxt="layer{type:\"Pooling\" 
pooling_param { pool: MAX kernel_size: 2 stride: 2}}")
+    pool1 = mx.symbol.CaffeOp(data_0=act1,
+                              prototxt="layer{type:\"Pooling\" pooling_param { 
pool: MAX kernel_size: 2 stride: 2}}")
 
     # second conv
-    conv2 = mx.symbol.CaffeOp(data_0=pool1, num_weight=2, 
prototxt="layer{type:\"Convolution\" convolution_param { num_output: 50 
kernel_size: 5 stride: 1} }")
+    conv2 = mx.symbol.CaffeOp(data_0=pool1, num_weight=2,
+                              prototxt="layer{type:\"Convolution\" "
+                                       "convolution_param { num_output: 50 
kernel_size: 5 stride: 1} }")
     act2 = mx.symbol.CaffeOp(data_0=conv2, prototxt="layer{type:\"TanH\"}")
-    pool2 = mx.symbol.CaffeOp(data_0=act2, prototxt="layer{type:\"Pooling\" 
pooling_param { pool: MAX kernel_size: 2 stride: 2}}")
+    pool2 = mx.symbol.CaffeOp(data_0=act2,
+                              prototxt="layer{type:\"Pooling\" pooling_param { 
pool: MAX kernel_size: 2 stride: 2}}")
 
-    fc1 = mx.symbol.CaffeOp(data_0=pool2, num_weight=2, 
prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 500} }")
+    fc1 = mx.symbol.CaffeOp(data_0=pool2, num_weight=2,
+                            prototxt="layer{type:\"InnerProduct\" 
inner_product_param{num_output: 500} }")
     act3 = mx.symbol.CaffeOp(data_0=fc1, prototxt="layer{type:\"TanH\"}")
 
     # second fullc
-    fc2 = mx.symbol.CaffeOp(data_0=act3, num_weight=2, 
prototxt="layer{type:\"InnerProduct\"inner_product_param{num_output: 10} }")
+    fc2 = mx.symbol.CaffeOp(data_0=act3, num_weight=2,
+                            
prototxt="layer{type:\"InnerProduct\"inner_product_param{num_output: 10} }")
     if use_caffe_loss:
         label = mx.symbol.Variable('softmax_label')
-        lenet = mx.symbol.CaffeLoss(data=fc2, label=label, grad_scale=1, 
name='softmax', prototxt="layer{type:\"SoftmaxWithLoss\"}")
+        lenet = mx.symbol.CaffeLoss(data=fc2, label=label, grad_scale=1, 
name='softmax',
+                                    prototxt="layer{type:\"SoftmaxWithLoss\"}")
     else:
         lenet = mx.symbol.SoftmaxOutput(data=fc2, name='softmax')
     return lenet
 
+
 def get_network_from_json_file(file_name):
     network = mx.sym.load(file_name)
     return network
 
+
 def parse_args():
+    """
+    Parse the arguments
+    """
     parser = argparse.ArgumentParser(description='train an image classifier on 
mnist')
     parser.add_argument('--network', type=str, default='lenet',
                         help='the cnn to use (mlp | lenet | <path to network 
json file>')
diff --git a/example/caffe/data.py b/example/caffe/data.py
index 15276c42360..d1760204682 100644
--- a/example/caffe/data.py
+++ b/example/caffe/data.py
@@ -14,42 +14,48 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+"""
+Create the helper functions to mnist dataset for Caffe operators in MXNet
+"""
 import mxnet as mx
 from mxnet.test_utils import get_mnist_ubyte
 
+
 def get_iterator(data_shape, use_caffe_data):
+    """
+    Generate the iterator of mnist dataset
+    """
     def get_iterator_impl_mnist(args, kv):
         """return train and val iterators for mnist"""
         # download data
         get_mnist_ubyte()
         flat = False if len(data_shape) != 1 else True
 
-        train           = mx.io.MNISTIter(
-            image       = "data/train-images-idx3-ubyte",
-            label       = "data/train-labels-idx1-ubyte",
-            input_shape = data_shape,
-            batch_size  = args.batch_size,
-            shuffle     = True,
-            flat        = flat,
-            num_parts   = kv.num_workers,
-            part_index  = kv.rank)
+        train = mx.io.MNISTIter(
+            image="data/train-images-idx3-ubyte",
+            label="data/train-labels-idx1-ubyte",
+            input_shape=data_shape,
+            batch_size=args.batch_size,
+            shuffle=True,
+            flat=flat,
+            num_parts=kv.num_workers,
+            part_index=kv.rank)
 
         val = mx.io.MNISTIter(
-            image       = "data/t10k-images-idx3-ubyte",
-            label       = "data/t10k-labels-idx1-ubyte",
-            input_shape = data_shape,
-            batch_size  = args.batch_size,
-            flat        = flat,
-            num_parts   = kv.num_workers,
-            part_index  = kv.rank)
+            image="data/t10k-images-idx3-ubyte",
+            label="data/t10k-labels-idx1-ubyte",
+            input_shape=data_shape,
+            batch_size=args.batch_size,
+            flat=flat,
+            num_parts=kv.num_workers,
+            part_index=kv.rank)
 
         return (train, val)
 
     def get_iterator_impl_caffe(args, kv):
         flat = False if len(data_shape) != 1 else True
         train = mx.io.CaffeDataIter(
-            prototxt =
+            prototxt=
             'layer { \
                 name: "mnist" \
                 type: "Data" \
@@ -67,13 +73,13 @@ def get_iterator_impl_caffe(args, kv):
                     backend: LMDB \
                 } \
             }',
-            flat           = flat,
-            num_examples   = 60000
+            flat=flat,
+            num_examples=60000
             # float32 is the default, so left out here in order to illustrate
         )
 
         val = mx.io.CaffeDataIter(
-            prototxt =
+            prototxt=
             'layer { \
                 name: "mnist" \
                 type: "Data" \
@@ -91,9 +97,9 @@ def get_iterator_impl_caffe(args, kv):
                     backend: LMDB \
                 } \
             }',
-            flat           = flat,
-            num_examples   = 10000,
-            dtype          = "float32" # float32 is the default
+            flat=flat,
+            num_examples=10000,
+            dtype="float32"  # float32 is the default
         )
 
         return train, val
diff --git a/example/caffe/train_model.py b/example/caffe/train_model.py
index 4290e71063e..a600cd07124 100644
--- a/example/caffe/train_model.py
+++ b/example/caffe/train_model.py
@@ -14,12 +14,18 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
-import mxnet as mx
-import logging
+"""
+Train module with using Caffe operator in MXNet
+"""
 import os
+import logging
+import mxnet as mx
+
 
 def fit(args, network, data_loader, eval_metrics=None, 
batch_end_callback=None):
+    """
+    Train the model with using Caffe operator in MXNet
+    """
     # kvstore
     kv = mx.kvstore.create(args.kv_store)
 
@@ -74,8 +80,8 @@ def fit(args, network, data_loader, eval_metrics=None, 
batch_end_callback=None):
 
     if 'lr_factor' in args and args.lr_factor < 1:
         model_args['lr_scheduler'] = mx.lr_scheduler.FactorScheduler(
-            step = max(int(epoch_size * args.lr_factor_epoch), 1),
-            factor = args.lr_factor)
+            step=max(int(epoch_size * args.lr_factor_epoch), 1),
+            factor=args.lr_factor)
 
     if 'clip_gradient' in args and args.clip_gradient is not None:
         model_args['clip_gradient'] = args.clip_gradient
@@ -85,12 +91,11 @@ def fit(args, network, data_loader, eval_metrics=None, 
batch_end_callback=None):
             args.gpus is None or len(args.gpus.split(',')) is 1):
         kv = None
 
-
     mod = mx.mod.Module(network, context=devs)
 
     if eval_metrics is None:
         eval_metrics = ['accuracy']
-        ## TopKAccuracy only allows top_k > 1
+        # TopKAccuracy only allows top_k > 1
         for top_k in [5, 10, 20]:
             eval_metrics.append(mx.metric.create('top_k_accuracy', 
top_k=top_k))
 
@@ -102,8 +107,7 @@ def fit(args, network, data_loader, eval_metrics=None, 
batch_end_callback=None):
     batch_end_callback.append(mx.callback.Speedometer(args.batch_size, 50))
 
     mod.fit(train_data=train, eval_metric=eval_metrics, eval_data=val, 
optimizer='sgd',
-        optimizer_params={'learning_rate':args.lr, 'momentum': 0.9, 'wd': 
0.00001},
-        num_epoch=args.num_epochs, batch_end_callback=batch_end_callback,
-        initializer=mx.init.Xavier(factor_type="in", magnitude=2.34),
-        kvstore=kv, epoch_end_callback=checkpoint, **model_args)
-
+            optimizer_params={'learning_rate':args.lr, 'momentum': 0.9, 'wd': 
0.00001},
+            num_epoch=args.num_epochs, batch_end_callback=batch_end_callback,
+            initializer=mx.init.Xavier(factor_type="in", magnitude=2.34),
+            kvstore=kv, epoch_end_callback=checkpoint, **model_args)
diff --git a/example/capsnet/capsulelayers.py b/example/capsnet/capsulelayers.py
index 5ac4fad4914..774625c71ac 100644
--- a/example/capsnet/capsulelayers.py
+++ b/example/capsnet/capsulelayers.py
@@ -14,7 +14,9 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+"""
+Create layers of capsule net
+"""
 import mxnet as mx
 
 
@@ -98,7 +100,8 @@ def __call__(self, data):
                     mx.sym.sum(mx.sym.broadcast_mul(c, inputs_hat_stopped, 
name='broadcast_mul_' + str(i)),
                                axis=1, keepdims=True,
                                name='sum_' + str(i)), name='output_' + str(i), 
squash_axis=4)
-                bias_ = bias_ + mx.sym.sum(mx.sym.broadcast_mul(c, 
inputs_hat_stopped, name='bias_broadcast_mul' + str(i)),
+                bias_ = bias_ + mx.sym.sum(mx.sym.broadcast_mul(c, 
inputs_hat_stopped,
+                                                                
name='bias_broadcast_mul' + str(i)),
                                            axis=4,
                                            keepdims=True, name='bias_' + 
str(i))
 
diff --git a/example/capsnet/capsulenet.py b/example/capsnet/capsulenet.py
index 67108757bf3..a0695f67dd3 100644
--- a/example/capsnet/capsulenet.py
+++ b/example/capsnet/capsulenet.py
@@ -14,24 +14,31 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import mxnet as mx
-import numpy as np
+"""
+Generate MXNet implementation of CapsNet
+"""
 import os
 import re
 import gzip
 import struct
+import numpy as np
 import scipy.ndimage as ndi
+import mxnet as mx
 from capsulelayers import primary_caps, CapsuleLayer
 
 from mxboard import SummaryWriter
 
+
 def margin_loss(y_true, y_pred):
     loss = y_true * mx.sym.square(mx.sym.maximum(0., 0.9 - y_pred)) +\
         0.5 * (1 - y_true) * mx.sym.square(mx.sym.maximum(0., y_pred - 0.1))
     return mx.sym.mean(data=mx.sym.sum(loss, 1))
 
 
-def capsnet(batch_size, n_class, num_routing,recon_loss_weight):
+def capsnet(batch_size, n_class, num_routing, recon_loss_weight):
+    """
+    Create CapsNet
+    """
     # data.shape = [batch_size, 1, 28, 28]
     data = mx.sym.Variable('data')
 
@@ -107,7 +114,8 @@ def read_data(label_url, image_url):
         label = np.fromstring(flbl.read(), dtype=np.int8)
     with gzip.open(download_data(image_url), 'rb') as fimg:
         magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
-        image = np.fromstring(fimg.read(), dtype=np.uint8).reshape(len(label), 
rows, cols)
+        image = np.fromstring(fimg.read(), dtype=np.uint8)
+        np.reshape(image, len(label), (rows, cols))
     return label, image
 
 
@@ -116,10 +124,13 @@ def to4d(img):
 
 
 class LossMetric(mx.metric.EvalMetric):
-    def __init__(self, batch_size, num_gpu):
+    """
+    Evaluate the loss function
+    """
+    def __init__(self, batch_size, num_gpus):
         super(LossMetric, self).__init__('LossMetric')
         self.batch_size = batch_size
-        self.num_gpu = num_gpu
+        self.num_gpu = num_gpus
         self.sum_metric = 0
         self.num_inst = 0
         self.loss = 0.0
@@ -130,6 +141,9 @@ def __init__(self, batch_size, num_gpu):
         self.n_batch = 0
 
     def update(self, labels, preds):
+        """
+        Update the hyper-parameters and loss of CapsNet
+        """
         batch_sum_metric = 0
         batch_num_inst = 0
         for label, pred_outcaps in zip(labels[0], preds[0]):
@@ -146,7 +160,7 @@ def update(self, labels, preds):
         self.batch_sum_metric = batch_sum_metric
         self.batch_num_inst = batch_num_inst
         self.batch_loss = batch_loss
-        self.n_batch += 1 
+        self.n_batch += 1
 
     def get_name_value(self):
         acc = float(self.sum_metric)/float(self.num_inst)
@@ -184,6 +198,9 @@ def __call__(self, num_update):
 
 
 def do_training(num_epoch, optimizer, kvstore, learning_rate, model_prefix, 
decay):
+    """
+    Run training to CapsNet
+    """
     summary_writer = SummaryWriter(args.tblog_dir)
     lr_scheduler = SimpleLRScheduler(learning_rate)
     optimizer_params = {'lr_scheduler': lr_scheduler}
@@ -218,7 +235,8 @@ def do_training(num_epoch, optimizer, kvstore, 
learning_rate, model_prefix, deca
         summary_writer.add_scalar('val_loss', val_loss, n_epoch)
         summary_writer.add_scalar('val_recon_err', val_recon_err, n_epoch)
 
-        print('Epoch[%d] train acc: %.4f loss: %.6f recon_err: %.6f' % 
(n_epoch, train_acc, train_loss, train_recon_err))
+        print('Epoch[%d] train acc: %.4f loss: %.6f recon_err: %.6f' % 
(n_epoch, train_acc, train_loss,
+                                                                        
train_recon_err))
         print('Epoch[%d] val acc: %.4f loss: %.6f recon_err: %.6f' % (n_epoch, 
val_acc, val_loss, val_recon_err))
         print('SAVE CHECKPOINT')
 
@@ -227,10 +245,10 @@ def do_training(num_epoch, optimizer, kvstore, 
learning_rate, model_prefix, deca
         lr_scheduler.learning_rate = learning_rate * (decay ** n_epoch)
 
 
-def apply_transform(x,
-                    transform_matrix,
-                    fill_mode='nearest',
-                    cval=0.):
+def apply_transform(x, transform_matrix, fill_mode='nearest', cval=0.):
+    """
+    Apply transform on nd.array
+    """
     x = np.rollaxis(x, 0, 0)
     final_affine_matrix = transform_matrix[:2, :2]
     final_offset = transform_matrix[:2, 2]
@@ -255,30 +273,53 @@ def random_shift(x, width_shift_fraction, 
height_shift_fraction):
     x = apply_transform(x, shift_matrix, 'nearest')
     return x
 
+
 def _shuffle(data, idx):
     """Shuffle the data."""
     shuffle_data = []
 
-    for k, v in data:
-        shuffle_data.append((k, mx.ndarray.array(v.asnumpy()[idx], v.context)))
+    for idx_k, idx_v in data:
+        shuffle_data.append((idx_k, mx.ndarray.array(idx_v.asnumpy()[idx], 
idx_v.context)))
 
     return shuffle_data
 
+
 class MNISTCustomIter(mx.io.NDArrayIter):
-    
+    """
+    Create custom iterator of mnist dataset
+    """
+    def __init__(self, data, label, batch_size, shuffle):
+        self.data = data
+        self.label = label
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.cursor = None
+
     def reset(self):
+        """
+        Reset class MNISTCustomIter(mx.io.NDArrayIter):
+        """
         # shuffle data
         if self.is_train:
             np.random.shuffle(self.idx)
             self.data = _shuffle(self.data, self.idx)
             self.label = _shuffle(self.label, self.idx)
+
         if self.last_batch_handle == 'roll_over' and self.cursor > 
self.num_data:
-            self.cursor = -self.batch_size + 
(self.cursor%self.num_data)%self.batch_size
+            self.cursor = -self.batch_size + (self.cursor % self.num_data) % 
self.batch_size
         else:
             self.cursor = -self.batch_size
+
     def set_is_train(self, is_train):
+        """
+        Set training flag
+        """
         self.is_train = is_train
+
     def next(self):
+        """
+        Generate next of iterator
+        """
         if self.iter_next():
             if self.is_train:
                 data_raw_list = self.getdata()
@@ -288,8 +329,7 @@ def next(self):
                 return mx.io.DataBatch(data=[mx.nd.array(data_shifted)], 
label=self.getlabel(),
                                        pad=self.getpad(), index=None)
             else:
-                 return mx.io.DataBatch(data=self.getdata(), 
label=self.getlabel(), \
-                                  pad=self.getpad(), index=None)
+                return mx.io.DataBatch(data=self.getdata(), 
label=self.getlabel(), pad=self.getpad(), index=None)
 
         else:
             raise StopIteration
@@ -298,10 +338,9 @@ def next(self):
 if __name__ == "__main__":
     # Read mnist data set
     path = 'http://yann.lecun.com/exdb/mnist/'
-    (train_lbl, train_img) = read_data(
-        path + 'train-labels-idx1-ubyte.gz', path + 
'train-images-idx3-ubyte.gz')
-    (val_lbl, val_img) = read_data(
-        path + 't10k-labels-idx1-ubyte.gz', path + 't10k-images-idx3-ubyte.gz')
+    (train_lbl, train_img) = read_data(path + 'train-labels-idx1-ubyte.gz', 
path + 'train-images-idx3-ubyte.gz')
+    (val_lbl, val_img) = read_data(path + 't10k-labels-idx1-ubyte.gz', path + 
't10k-images-idx3-ubyte.gz')
+
     # set batch size
     import argparse
     parser = argparse.ArgumentParser()
@@ -331,10 +370,13 @@ def next(self):
     # generate train_iter, val_iter
     train_iter = MNISTCustomIter(data=to4d(train_img), label=train_lbl, 
batch_size=int(args.batch_size), shuffle=True)
     train_iter.set_is_train(True)
-    val_iter = MNISTCustomIter(data=to4d(val_img), label=val_lbl, 
batch_size=int(args.batch_size),)
+    val_iter = MNISTCustomIter(data=to4d(val_img), label=val_lbl, 
batch_size=int(args.batch_size), shuffle=True)
     val_iter.set_is_train(False)
     # define capsnet
-    final_net = capsnet(batch_size=int(args.batch_size/num_gpu), n_class=10, 
num_routing=args.num_routing, recon_loss_weight=args.recon_loss_weight)
+    final_net = capsnet(batch_size=int(args.batch_size/num_gpu),
+                        n_class=10,
+                        num_routing=args.num_routing,
+                        recon_loss_weight=args.recon_loss_weight)
     # set metric
     loss_metric = LossMetric(args.batch_size/num_gpu, 1)
 
@@ -343,5 +385,6 @@ def next(self):
     module.bind(data_shapes=train_iter.provide_data,
                 label_shapes=val_iter.provide_label,
                 for_training=True)
+
     do_training(num_epoch=args.num_epoch, optimizer='adam', kvstore='device', 
learning_rate=args.lr,
                 model_prefix=args.model_prefix, decay=args.decay)
diff --git a/example/cnn_chinese_text_classification/data_helpers.py 
b/example/cnn_chinese_text_classification/data_helpers.py
index b3a13deec77..7030a98171e 100644
--- a/example/cnn_chinese_text_classification/data_helpers.py
+++ b/example/cnn_chinese_text_classification/data_helpers.py
@@ -14,6 +14,9 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+
+"""Help functions to support for implementing CNN + Highway Network for 
Chinese Text Classification in MXNet"""
+
 import codecs
 import itertools
 import os
@@ -40,19 +43,23 @@ def clean_str(string):
     string = re.sub(r"\'ll", " \'ll", string)
     string = re.sub(r",", " , ", string)
     string = re.sub(r"!", " ! ", string)
-    string = re.sub(r"\(", " \( ", string)
-    string = re.sub(r"\)", " \) ", string)
-    string = re.sub(r"\?", " \? ", string)
+    string = re.sub(r"\(", r" \( ", string)
+    string = re.sub(r"\)", r" \) ", string)
+    string = re.sub(r"\?", r" \? ", string)
     string = re.sub(r"\s{2,}", " ", string)
     return string.strip().lower()
 
 
 def get_chinese_text():
+    """
+    Download the chinese_text dataset and unzip it
+    """
     if not os.path.isdir("data/"):
         os.system("mkdir data/")
     if (not os.path.exists('data/pos.txt')) or \
        (not os.path.exists('data/neg')):
-        os.system("wget -q 
https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/chinese_text.zip
 -P data/")
+        os.system("wget -q 
https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/chinese_text.zip
 "
+                  "-P data/")
         os.chdir("./data")
         os.system("unzip -u chinese_text.zip")
         os.chdir("..")
@@ -92,8 +99,9 @@ def pad_sentences(sentences, padding_word="</s>"):
     """
     sequence_length = max(len(x) for x in sentences)
     padded_sentences = []
-    for i in range(len(sentences)):
-        sentence = sentences[i]
+    for i, element in enumerate(sentences):
+        print(i, element)
+        sentence = element
         num_padding = sequence_length - len(sentence)
         new_sentence = sentence + [padding_word] * num_padding
         padded_sentences.append(new_sentence)
@@ -116,30 +124,30 @@ def build_vocab(sentences):
 
 def build_input_data(sentences, labels, vocabulary):
     """
-    Maps sentencs and labels to vectors based on a vocabulary.
+    Maps sentences and labels to vectors based on a vocabulary.
     """
     x = np.array([[vocabulary[word] for word in sentence] for sentence in 
sentences])
     y = np.array(labels)
     return [x, y]
 
 
-def build_input_data_with_word2vec(sentences, labels, word2vec):
-    """Map sentences and labels to vectors based on a pretrained word2vec"""
+def build_input_data_with_word2vec(sentences, labels, word2vec_list):
+    """Map sentences and labels to vectors based on a pre-trained word2vec"""
     x_vec = []
     for sent in sentences:
         vec = []
         for word in sent:
-            if word in word2vec:
-                vec.append(word2vec[word])
+            if word in word2vec_list:
+                vec.append(word2vec_list[word])
             else:
-                vec.append(word2vec['</s>'])
+                vec.append(word2vec_list['</s>'])
         x_vec.append(vec)
     x_vec = np.array(x_vec)
     y_vec = np.array(labels)
     return [x_vec, y_vec]
 
 
-def load_data_with_word2vec(word2vec):
+def load_data_with_word2vec(word2vec_list):
     """
     Loads and preprocessed data for the MR dataset.
     Returns input vectors, labels, vocabulary, and inverse vocabulary.
@@ -148,7 +156,7 @@ def load_data_with_word2vec(word2vec):
     sentences, labels = load_data_and_labels()
     sentences_padded = pad_sentences(sentences)
     # vocabulary, vocabulary_inv = build_vocab(sentences_padded)
-    return build_input_data_with_word2vec(sentences_padded, labels, word2vec)
+    return build_input_data_with_word2vec(sentences_padded, labels, 
word2vec_list)
 
 
 def load_data():
@@ -182,18 +190,21 @@ def batch_iter(data, batch_size, num_epochs):
 
 
 def load_pretrained_word2vec(infile):
+    """
+    Load the pre-trained word2vec from file.
+    """
     if isinstance(infile, str):
         infile = open(infile)
 
-    word2vec = {}
+    word2vec_list = {}
     for idx, line in enumerate(infile):
         if idx == 0:
             vocab_size, dim = line.strip().split()
         else:
             tks = line.strip().split()
-            word2vec[tks[0]] = map(float, tks[1:])
+            word2vec_list[tks[0]] = map(float, tks[1:])
 
-    return word2vec
+    return word2vec_list
 
 
 def load_google_word2vec(path):
diff --git a/example/cnn_chinese_text_classification/text_cnn.py 
b/example/cnn_chinese_text_classification/text_cnn.py
index 4598a52e667..f98302aa182 100644
--- a/example/cnn_chinese_text_classification/text_cnn.py
+++ b/example/cnn_chinese_text_classification/text_cnn.py
@@ -20,12 +20,14 @@
 
 # -*- coding: utf-8 -*-
 
-import sys, os
-import mxnet as mx
-import numpy as np
-import argparse
+"""Implementing CNN + Highway Network for Chinese Text Classification in 
MXNet"""
+
+import os
+import sys
 import logging
-import time
+import argparse
+import numpy as np
+import mxnet as mx
 
 from mxnet import random
 from mxnet.initializer import Xavier, Initializer
@@ -63,12 +65,30 @@
 
 
 def save_model():
+    """
+    Save cnn model
+
+    Returns
+    ----------
+    callback: A callback function that can be passed as epoch_end_callback to 
fit
+    """
     if not os.path.exists("checkpoint"):
         os.mkdir("checkpoint")
     return mx.callback.do_checkpoint("checkpoint/checkpoint", args.save_period)
 
 
 def highway(data):
+    """
+    Construct highway net
+
+    Parameters
+    ----------
+    data:
+
+    Returns
+    ----------
+    Highway Networks
+    """
     _data = data
     high_weight = mx.sym.Variable('high_weight')
     high_bias = mx.sym.Variable('high_bias')
@@ -85,20 +105,42 @@ def highway(data):
 
 
 def data_iter(batch_size, num_embed, pre_trained_word2vec=False):
+    """
+    Construct data iter
+
+    Parameters
+    ----------
+    batch_size: int
+    num_embed: int
+    pre_trained_word2vec: boolean
+                        identify the pre-trained layers or not
+    Returns
+    ----------
+    train_set: DataIter
+                Train DataIter
+    valid: DataIter
+                Valid DataIter
+    sentences_size: int
+                array dimensions
+    embedded_size: int
+                array dimensions
+    vocab_size: int
+                array dimensions
+    """
     logger.info('Loading data...')
     if pre_trained_word2vec:
         word2vec = data_helpers.load_pretrained_word2vec('data/rt.vec')
         x, y = data_helpers.load_data_with_word2vec(word2vec)
-        # reshpae for convolution input
+        # reshape for convolution input
         x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2]))
-        embed_size = x.shape[-1]
-        sentence_size = x.shape[2]
-        vocab_size = -1
+        embedded_size = x.shape[-1]
+        sentences_size = x.shape[2]
+        vocabulary_size = -1
     else:
         x, y, vocab, vocab_inv = data_helpers.load_data()
-        embed_size = num_embed
-        sentence_size = x.shape[1]
-        vocab_size = len(vocab)
+        embedded_size = num_embed
+        sentences_size = x.shape[1]
+        vocabulary_size = len(vocab)
 
     # randomly shuffle data
     np.random.seed(10)
@@ -109,30 +151,56 @@ def data_iter(batch_size, num_embed, 
pre_trained_word2vec=False):
     # split train/valid set
     x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:]
     y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:]
-    logger.info('Train/Valid split: %d/%d' % (len(y_train), len(y_dev)))
+    logger.info('Train/Valid split: %d/%d', len(y_train), len(y_dev))
     logger.info('train shape: %(shape)s', {'shape': x_train.shape})
     logger.info('valid shape: %(shape)s', {'shape': x_dev.shape})
-    logger.info('sentence max words: %(shape)s', {'shape': sentence_size})
-    logger.info('embedding size: %(msg)s', {'msg': embed_size})
-    logger.info('vocab size: %(msg)s', {'msg': vocab_size})
+    logger.info('sentence max words: %(shape)s', {'shape': sentences_size})
+    logger.info('embedding size: %(msg)s', {'msg': embedded_size})
+    logger.info('vocab size: %(msg)s', {'msg': vocabulary_size})
 
-    train = mx.io.NDArrayIter(
+    train_set = mx.io.NDArrayIter(
         x_train, y_train, batch_size, shuffle=True)
     valid = mx.io.NDArrayIter(
         x_dev, y_dev, batch_size)
-    return (train, valid, sentence_size, embed_size, vocab_size)
+    return train_set, valid, sentences_size, embedded_size, vocabulary_size
 
 
-def sym_gen(batch_size, sentence_size, num_embed, vocab_size,
-            num_label=2, filter_list=[3, 4, 5], num_filter=100,
+def sym_gen(batch_size, sentences_size, num_embed, vocabulary_size,
+            num_label=2, filter_list=None, num_filter=100,
             dropout=0.0, pre_trained_word2vec=False):
+    """
+    Generate network symbol
+
+    Parameters
+    ----------
+    batch_size: int
+    sentences_size: int
+    num_embed: int
+    vocabulary_size: int
+    num_label: int
+    filter_list: list
+    num_filter: int
+    dropout: int
+    pre_trained_word2vec: boolean
+                        identify the pre-trained layers or not
+    Returns
+    ----------
+    sm: symbol
+    data: list of str
+        data names
+    softmax_label: list of str
+        label names
+    """
     input_x = mx.sym.Variable('data')
     input_y = mx.sym.Variable('softmax_label')
 
     # embedding layer
     if not pre_trained_word2vec:
-        embed_layer = mx.sym.Embedding(data=input_x, input_dim=vocab_size, 
output_dim=num_embed, name='vocab_embed')
-        conv_input = mx.sym.Reshape(data=embed_layer, 
target_shape=(batch_size, 1, sentence_size, num_embed))
+        embed_layer = mx.sym.Embedding(data=input_x,
+                                       input_dim=vocabulary_size,
+                                       output_dim=num_embed,
+                                       name='vocab_embed')
+        conv_input = mx.sym.Reshape(data=embed_layer, 
target_shape=(batch_size, 1, sentences_size, num_embed))
     else:
         conv_input = input_x
 
@@ -141,7 +209,7 @@ def sym_gen(batch_size, sentence_size, num_embed, 
vocab_size,
     for i, filter_size in enumerate(filter_list):
         convi = mx.sym.Convolution(data=conv_input, kernel=(filter_size, 
num_embed), num_filter=num_filter)
         relui = mx.sym.Activation(data=convi, act_type='relu')
-        pooli = mx.sym.Pooling(data=relui, pool_type='max', 
kernel=(sentence_size - filter_size + 1, 1), stride=(1, 1))
+        pooli = mx.sym.Pooling(data=relui, pool_type='max', 
kernel=(sentences_size - filter_size + 1, 1), stride=(1, 1))
         pooled_outputs.append(pooli)
 
     # combine all pooled outputs
@@ -170,10 +238,28 @@ def sym_gen(batch_size, sentence_size, num_embed, 
vocab_size,
     return sm, ('data',), ('softmax_label',)
 
 
-def train(symbol, train_iter, valid_iter, data_names, label_names):
-    devs = mx.cpu() if args.gpus is None or args.gpus is '' else [
-        mx.gpu(int(i)) for i in args.gpus.split(',')]
-    module = mx.mod.Module(symbol, data_names=data_names, 
label_names=label_names, context=devs)
+def train(symbol_data, train_iterator, valid_iterator, data_column_names, 
target_names):
+    """
+    Train cnn model
+
+    Parameters
+    ----------
+    symbol_data: symbol
+    train_iterator: DataIter
+                    Train DataIter
+    valid_iterator: DataIter
+                    Valid DataIter
+    data_column_names: list of str
+                       Defaults to ('data') for a typical model used in image 
classification
+    target_names: list of str
+                  Defaults to ('softmax_label') for a typical model used in 
image classification
+    """
+    devs = mx.cpu()  # default setting
+    if args.gpus is not None:
+        for i in args.gpus.split(','):
+            mx.gpu(int(i))
+        devs = mx.gpu()
+    module = mx.mod.Module(symbol_data, data_names=data_column_names, 
label_names=target_names, context=devs)
 
     init_params = {
         'vocab_embed_weight': {'uniform': 0.1},
@@ -185,7 +271,7 @@ def train(symbol, train_iter, valid_iter, data_names, 
label_names):
         'cls_weight': {'uniform': 0.1}, 'cls_bias': {'costant': 0},
     }
     # custom init_params
-    module.bind(data_shapes=train_iter.provide_data, 
label_shapes=train_iter.provide_label)
+    module.bind(data_shapes=train_iterator.provide_data, 
label_shapes=train_iterator.provide_label)
     module.init_params(CustomInit(init_params))
     lr_sch = mx.lr_scheduler.FactorScheduler(step=25000, factor=0.999)
     module.init_optimizer(
@@ -195,8 +281,8 @@ def norm_stat(d):
         return mx.nd.norm(d) / np.sqrt(d.size)
     mon = mx.mon.Monitor(25000, norm_stat)
 
-    module.fit(train_data=train_iter,
-               eval_data=valid_iter,
+    module.fit(train_data=train_iterator,
+               eval_data=valid_iterator,
                eval_metric='acc',
                kvstore=args.kv_store,
                monitor=mon,
diff --git a/example/cnn_text_classification/data_helpers.py 
b/example/cnn_text_classification/data_helpers.py
index b6fe1e6917a..948b95105c8 100644
--- a/example/cnn_text_classification/data_helpers.py
+++ b/example/cnn_text_classification/data_helpers.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+"""Help functions to support for implementing CNN + Highway Network for Text 
Classification in MXNet"""
+
 import itertools
 import os
 import re
@@ -40,9 +42,9 @@ def clean_str(string):
     string = re.sub(r"\'ll", " \'ll", string)
     string = re.sub(r",", " , ", string)
     string = re.sub(r"!", " ! ", string)
-    string = re.sub(r"\(", " \( ", string)
-    string = re.sub(r"\)", " \) ", string)
-    string = re.sub(r"\?", " \? ", string)
+    string = re.sub(r"\(", r" \( ", string)
+    string = re.sub(r"\)", r" \) ", string)
+    string = re.sub(r"\?", r" \? ", string)
     string = re.sub(r"\s{2,}", " ", string)
     return string.strip().lower()
 
@@ -81,8 +83,8 @@ def pad_sentences(sentences, padding_word="</s>"):
     """
     sequence_length = max(len(x) for x in sentences)
     padded_sentences = []
-    for i in range(len(sentences)):
-        sentence = sentences[i]
+    for i, sentence in enumerate(sentences):
+        print(i, sentence)
         num_padding = sequence_length - len(sentence)
         new_sentence = sentence + [padding_word] * num_padding
         padded_sentences.append(new_sentence)
@@ -111,23 +113,24 @@ def build_input_data(sentences, labels, vocabulary):
     y = np.array(labels)
     return [x, y]
 
-def build_input_data_with_word2vec(sentences, labels, word2vec):
+
+def build_input_data_with_word2vec(sentences, labels, word2vec_list):
     """Map sentences and labels to vectors based on a pretrained word2vec"""
     x_vec = []
     for sent in sentences:
         vec = []
         for word in sent:
-            if word in word2vec:
-                vec.append(word2vec[word])
+            if word in word2vec_list:
+                vec.append(word2vec_list[word])
             else:
-                vec.append(word2vec['</s>'])
+                vec.append(word2vec_list['</s>'])
         x_vec.append(vec)
     x_vec = np.array(x_vec)
     y_vec = np.array(labels)
     return [x_vec, y_vec]
 
 
-def load_data_with_word2vec(word2vec):
+def load_data_with_word2vec(word2vec_list):
     """
     Loads and preprocessed data for the MR dataset.
     Returns input vectors, labels, vocabulary, and inverse vocabulary.
@@ -136,7 +139,7 @@ def load_data_with_word2vec(word2vec):
     sentences, labels = load_data_and_labels()
     sentences_padded = pad_sentences(sentences)
     # vocabulary, vocabulary_inv = build_vocab(sentences_padded)
-    return build_input_data_with_word2vec(sentences_padded, labels, word2vec)
+    return build_input_data_with_word2vec(sentences_padded, labels, 
word2vec_list)
 
 
 def load_data():
@@ -170,18 +173,21 @@ def batch_iter(data, batch_size, num_epochs):
 
 
 def load_pretrained_word2vec(infile):
+    """
+    Load the pre-trained word2vec from file.
+    """
     if isinstance(infile, str):
         infile = open(infile)
 
-    word2vec = {}
+    word2vec_list = {}
     for idx, line in enumerate(infile):
         if idx == 0:
             vocab_size, dim = line.strip().split()
         else:
             tks = line.strip().split()
-            word2vec[tks[0]] = map(float, tks[1:])
+            word2vec_list[tks[0]] = map(float, tks[1:])
 
-    return word2vec
+    return word2vec_list
 
 
 def load_google_word2vec(path):


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to