[GitHub] hetong007 closed pull request #12360: [MXNET-690] Add tests for initializers in R

GitBox Mon, 27 Aug 2018 21:09:02 -0700

hetong007 closed pull request #12360: [MXNET-690] Add tests for initializers in 
R
URL: https://github.com/apache/incubator-mxnet/pull/12360


This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/R-package/tests/testthat/get_data.R 
b/R-package/tests/testthat/get_data.R
index 2676b20fa80..0e27894498b 100644
--- a/R-package/tests/testthat/get_data.R
+++ b/R-package/tests/testthat/get_data.R
@@ -3,13 +3,11 @@ GetMNIST_ubyte <- function() {
   if (!dir.exists("data")) {
     dir.create("data/")
   }
-  if (!file.exists('data/train-images-idx3-ubyte') |
-      !file.exists('data/train-labels-idx1-ubyte') |
-      !file.exists('data/t10k-images-idx3-ubyte') |
-      !file.exists('data/t10k-labels-idx1-ubyte')) {
-    download.file('http://data.mxnet.io/mxnet/data/mnist.zip', destfile = 
'data/mnist.zip')
-    unzip('data/mnist.zip', exdir = 'data/')
-    file.remove('data/mnist.zip')
+  if (!file.exists("data/train-images-idx3-ubyte") | 
!file.exists("data/train-labels-idx1-ubyte") | 
+    !file.exists("data/t10k-images-idx3-ubyte") | 
!file.exists("data/t10k-labels-idx1-ubyte")) {
+    download.file("http://data.mxnet.io/mxnet/data/mnist.zip";, destfile = 
"data/mnist.zip")
+    unzip("data/mnist.zip", exdir = "data/")
+    file.remove("data/mnist.zip")
   }
 }
 
@@ -17,12 +15,11 @@ GetMNIST_csv <- function() {
   if (!dir.exists("data")) {
     dir.create("data/")
   }
-  if (!file.exists('data/train.csv') |
-      !file.exists('data/test.csv')) {
-    
download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/mnist_csv.zip',
-                  destfile = 'data/mnist_csv.zip')
-    unzip('data/mnist_csv.zip', exdir = 'data/')
-    file.remove('data/mnist_csv.zip')
+  if (!file.exists("data/train.csv") | !file.exists("data/test.csv")) {
+    
download.file("https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/mnist_csv.zip";,
 
+      destfile = "data/mnist_csv.zip")
+    unzip("data/mnist_csv.zip", exdir = "data/")
+    file.remove("data/mnist_csv.zip")
   }
 }
 
@@ -30,14 +27,11 @@ GetCifar10 <- function() {
   if (!dir.exists("data")) {
     dir.create("data/")
   }
-  if (!file.exists('data/cifar/train.rec') |
-      !file.exists('data/cifar/test.rec') |
-      !file.exists('data/cifar/train.lst') |
-      !file.exists('data/cifar/test.lst')) {
-    download.file('http://data.mxnet.io/mxnet/data/cifar10.zip',
-                  destfile = 'data/cifar10.zip')
-    unzip('data/cifar10.zip', exdir = 'data/')
-    file.remove('data/cifar10.zip')
+  if (!file.exists("data/cifar/train.rec") | 
!file.exists("data/cifar/test.rec") | 
+    !file.exists("data/cifar/train.lst") | 
!file.exists("data/cifar/test.lst")) {
+    download.file("http://data.mxnet.io/mxnet/data/cifar10.zip";, destfile = 
"data/cifar10.zip")
+    unzip("data/cifar10.zip", exdir = "data/")
+    file.remove("data/cifar10.zip")
   }
 }
 
@@ -45,13 +39,13 @@ GetInception <- function() {
   if (!dir.exists("model")) {
     dir.create("model/")
   }
-  if (!file.exists('model/Inception-BN-0126.params')) {
-    
download.file('http://data.dmlc.ml/models/imagenet/inception-bn/Inception-BN-0126.params',
-                  destfile = 'model/Inception-BN-0126.params')
+  if (!file.exists("model/Inception-BN-0126.params")) {
+    
download.file("http://data.dmlc.ml/models/imagenet/inception-bn/Inception-BN-0126.params";,
 
+      destfile = "model/Inception-BN-0126.params")
   }
-  if (!file.exists('model/Inception-BN-symbol.json')) {
-    
download.file('http://data.dmlc.ml/models/imagenet/inception-bn/Inception-BN-symbol.json',
-                  destfile = 'model/Inception-BN-symbol.json')
+  if (!file.exists("model/Inception-BN-symbol.json")) {
+    
download.file("http://data.dmlc.ml/models/imagenet/inception-bn/Inception-BN-symbol.json";,
 
+      destfile = "model/Inception-BN-symbol.json")
   }
 }
 
@@ -59,12 +53,11 @@ GetCatDog <- function() {
   if (!dir.exists("data")) {
     dir.create("data/")
   }
-  if (!file.exists('data/cats_dogs/cats_dogs_train.rec') |
-      !file.exists('data/cats_dogs/cats_dogs_val.rec')) {
-    
download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/cats_dogs.zip',
-                  destfile = 'data/cats_dogs.zip')
-    unzip('data/cats_dogs.zip', exdir = 'data/')
-    file.remove('data/cats_dogs.zip')
+  if (!file.exists("data/cats_dogs/cats_dogs_train.rec") | 
!file.exists("data/cats_dogs/cats_dogs_val.rec")) {
+    
download.file("https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/cats_dogs.zip";,
 
+      destfile = "data/cats_dogs.zip")
+    unzip("data/cats_dogs.zip", exdir = "data/")
+    file.remove("data/cats_dogs.zip")
   }
 }
 
@@ -72,11 +65,11 @@ GetMovieLens <- function() {
   if (!dir.exists("data")) {
     dir.create("data/")
   }
-  if (!file.exists('data/ml-100k/u.data')) {
-    download.file('http://files.grouplens.org/datasets/movielens/ml-100k.zip',
-                  destfile = 'data/ml-100k.zip')
-    unzip('data/ml-100k.zip', exdir = 'data/')
-    file.remove('data/ml-100k.zip')
+  if (!file.exists("data/ml-100k/u.data")) {
+    download.file("http://files.grouplens.org/datasets/movielens/ml-100k.zip";, 
+      destfile = "data/ml-100k.zip")
+    unzip("data/ml-100k.zip", exdir = "data/")
+    file.remove("data/ml-100k.zip")
   }
 }
 
@@ -84,12 +77,11 @@ GetISBI_data <- function() {
   if (!dir.exists("data")) {
     dir.create("data/")
   }
-  if (!file.exists('data/ISBI/train-volume.tif') |
-      !file.exists('data/ISBI/train-labels.tif')) {
-    
download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/ISBI.zip',
-                  destfile = 'data/ISBI.zip')
-    unzip('data/ISBI.zip', exdir = 'data/')
-    file.remove('data/ISBI.zip')
+  if (!file.exists("data/ISBI/train-volume.tif") | 
!file.exists("data/ISBI/train-labels.tif")) {
+    
download.file("https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/ISBI.zip";,
 
+      destfile = "data/ISBI.zip")
+    unzip("data/ISBI.zip", exdir = "data/")
+    file.remove("data/ISBI.zip")
   }
 }
 
@@ -97,11 +89,10 @@ GetCaptcha_data <- function() {
   if (!dir.exists("data")) {
     dir.create("data/")
   }
-  if (!file.exists('data/captcha_example/captcha_train.rec') |
-      !file.exists('data/captcha_example/captcha_test.rec')) {
-    
download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/captcha_example.zip',
-                  destfile = 'data/captcha_example.zip')
-    unzip('data/captcha_example.zip', exdir = 'data/')
-    file.remove('data/captcha_example.zip')
+  if (!file.exists("data/captcha_example/captcha_train.rec") | 
!file.exists("data/captcha_example/captcha_test.rec")) {
+    
download.file("https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/captcha_example.zip";,
 
+      destfile = "data/captcha_example.zip")
+    unzip("data/captcha_example.zip", exdir = "data/")
+    file.remove("data/captcha_example.zip")
   }
 }
diff --git a/R-package/tests/testthat/test_img_seg.R 
b/R-package/tests/testthat/test_img_seg.R
index b3400cd3bbc..9b63f5078fa 100644
--- a/R-package/tests/testthat/test_img_seg.R
+++ b/R-package/tests/testthat/test_img_seg.R
@@ -2,7 +2,8 @@ require(mxnet)
 
 source("get_data.R")
 
-if (Sys.getenv("R_GPU_ENABLE") != "" & as.integer(Sys.getenv("R_GPU_ENABLE")) 
== 1) {
+if (Sys.getenv("R_GPU_ENABLE") != "" & as.integer(Sys.getenv("R_GPU_ENABLE")) 
== 
+  1) {
   mx.ctx.default(new = mx.gpu())
   message("Using GPU for testing.")
 }
@@ -12,76 +13,89 @@ print_inferred_shape <- function(net) {
   print(slist$out.shapes)
 }
 
-convolution_module <- function(net, kernel_size, pad_size, filter_count,
-                               stride = c(1, 1), work_space = 2048, batch_norm 
= TRUE,
-                               down_pool = FALSE, up_pool = FALSE, act_type = 
"relu",
-                               convolution = TRUE) {
+convolution_module <- function(net, kernel_size, pad_size, filter_count, 
stride = c(1, 
+  1), work_space = 2048, batch_norm = TRUE, down_pool = FALSE, up_pool = 
FALSE, 
+  act_type = "relu", convolution = TRUE) {
   if (up_pool) {
-    net = mx.symbol.Deconvolution(net, kernel = c(2, 2), pad = c(0, 0),
-                                  stride = c(2, 2), num_filter = filter_count,
-                                  workspace = work_space)
-    net = mx.symbol.BatchNorm(net)
+    net <- mx.symbol.Deconvolution(net, kernel = c(2, 2), pad = c(0, 0), 
stride = c(2, 
+      2), num_filter = filter_count, workspace = work_space)
+    net <- mx.symbol.BatchNorm(net)
     if (act_type != "") {
-      net = mx.symbol.Activation(net, act_type = act_type)
+      net <- mx.symbol.Activation(net, act_type = act_type)
     }
   }
   if (convolution) {
-    conv = mx.symbol.Convolution(data = net, kernel = kernel_size, stride = 
stride,
-                                 pad = pad_size, num_filter = filter_count,
-                                 workspace = work_space)
-    net = conv
+    conv <- mx.symbol.Convolution(data = net, kernel = kernel_size, stride = 
stride, 
+      pad = pad_size, num_filter = filter_count, workspace = work_space)
+    net <- conv
   }
   if (batch_norm) {
-    net = mx.symbol.BatchNorm(net)
+    net <- mx.symbol.BatchNorm(net)
   }
   
   if (act_type != "") {
-    net = mx.symbol.Activation(net, act_type = act_type)
+    net <- mx.symbol.Activation(net, act_type = act_type)
   }
   
   if (down_pool) {
-    pool = mx.symbol.Pooling(net, pool_type = "max", kernel = c(2, 2), stride 
= c(2, 2))
-    net = pool
+    pool <- mx.symbol.Pooling(net, pool_type = "max", kernel = c(2, 2), stride 
= c(2, 
+      2))
+    net <- pool
   }
   print_inferred_shape(net)
   return(net)
 }
 
 get_unet <- function() {
-  data = mx.symbol.Variable('data')
-  kernel_size = c(3, 3)
-  pad_size = c(1, 1)
-  filter_count = 32
-  pool1 = convolution_module(data, kernel_size, pad_size, filter_count = 
filter_count, down_pool = TRUE)
-  net = pool1
-  pool2 = convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 2, down_pool = TRUE)
-  net = pool2
-  pool3 = convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 4, down_pool = TRUE)
-  net = pool3
-  pool4 = convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 4, down_pool = TRUE)
-  net = pool4
-  net = mx.symbol.Dropout(net)
-  pool5 = convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 8, down_pool = TRUE)
-  net = pool5
-  net = convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 4, up_pool = TRUE)
-  net = convolution_module(net, kernel_size, pad_size = c(2, 2), filter_count 
= filter_count * 4, up_pool = TRUE)
-  net = mx.symbol.Crop(net, pool3, num.args = 2)
-  net = mx.symbol.concat(c(pool3, net), num.args = 2)
-  net = mx.symbol.Dropout(net)
-  net = convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 4)
-  net = convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 4, up_pool = TRUE)
+  data <- mx.symbol.Variable("data")
+  kernel_size <- c(3, 3)
+  pad_size <- c(1, 1)
+  filter_count <- 32
+  pool1 <- convolution_module(data, kernel_size, pad_size, filter_count = 
filter_count, 
+    down_pool = TRUE)
+  net <- pool1
+  pool2 <- convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 
+    2, down_pool = TRUE)
+  net <- pool2
+  pool3 <- convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 
+    4, down_pool = TRUE)
+  net <- pool3
+  pool4 <- convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 
+    4, down_pool = TRUE)
+  net <- pool4
+  net <- mx.symbol.Dropout(net)
+  pool5 <- convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 
+    8, down_pool = TRUE)
+  net <- pool5
+  net <- convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 
+    4, up_pool = TRUE)
+  net <- convolution_module(net, kernel_size, pad_size = c(2, 2), filter_count 
= filter_count * 
+    4, up_pool = TRUE)
+  net <- mx.symbol.Crop(net, pool3, num.args = 2)
+  net <- mx.symbol.concat(c(pool3, net), num.args = 2)
+  net <- mx.symbol.Dropout(net)
+  net <- convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 
+    4)
+  net <- convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 
+    4, up_pool = TRUE)
   
-  net = mx.symbol.Concat(c(pool2, net), num.args = 2)
-  net = mx.symbol.Dropout(net)
-  net = convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 4)
-  net = convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 4, up_pool = TRUE)
-  convolution_module(net, kernel_size, pad_size, filter_count = filter_count * 
4)
-  net = mx.symbol.Concat(c(pool1, net), num.args = 2)
-  net = mx.symbol.Dropout(net)
-  net = convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 2)
-  net = convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 2, up_pool = TRUE)
-  net = convolution_module(net, kernel_size, pad_size, filter_count = 1, 
batch_norm = FALSE, act_type = "")
-  net = mx.symbol.SoftmaxOutput(data = net, name = 'sm')
+  net <- mx.symbol.Concat(c(pool2, net), num.args = 2)
+  net <- mx.symbol.Dropout(net)
+  net <- convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 
+    4)
+  net <- convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 
+    4, up_pool = TRUE)
+  convolution_module(net, kernel_size, pad_size, filter_count = filter_count * 
+    4)
+  net <- mx.symbol.Concat(c(pool1, net), num.args = 2)
+  net <- mx.symbol.Dropout(net)
+  net <- convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 
+    2)
+  net <- convolution_module(net, kernel_size, pad_size, filter_count = 
filter_count * 
+    2, up_pool = TRUE)
+  net <- convolution_module(net, kernel_size, pad_size, filter_count = 1, 
batch_norm = FALSE, 
+    act_type = "")
+  net <- mx.symbol.SoftmaxOutput(data = net, name = "sm")
   return(net)
 }
 
@@ -89,47 +103,46 @@ context("Image segmentation")
 
 test_that("UNET", {
   list.of.packages <- c("imager")
-  new.packages <- list.of.packages[!(list.of.packages %in% 
installed.packages()[,"Package"])]
-  if(length(new.packages)) install.packages(new.packages, repos = 
"https://cloud.r-project.org/";)
+  new.packages <- list.of.packages[!(list.of.packages %in% 
installed.packages()[, 
+    "Package"])]
+  if (length(new.packages)) 
+    install.packages(new.packages, repos = "https://cloud.r-project.org/";)
   GetISBI_data()
   library(imager)
   IMG_SIZE <- 168
   files <- list.files(path = "data/ISBI/train-volume/")
-  a = 'data/ISBI/train-volume/'
-  filess = paste(a, files, sep = '')
-  list_of_images = lapply(filess, function(x) {
+  a <- "data/ISBI/train-volume/"
+  filess <- paste(a, files, sep = "")
+  list_of_images <- lapply(filess, function(x) {
     x <- load.image(x)
     y <- resize(x, size_x = IMG_SIZE, size_y = IMG_SIZE)
   })
   
-  train.x = do.call('cbind', lapply(list_of_images, as.vector))
+  train.x <- do.call("cbind", lapply(list_of_images, as.vector))
   train.array <- train.x
   dim(train.array) <- c(IMG_SIZE, IMG_SIZE, 1, 30)
   
   files <- list.files(path = "data/ISBI/train-labels")
-  b = 'data/ISBI/train-labels/'
-  filess = paste(b, files, sep = '')
-  list_of_images = lapply(filess, function(x) {
+  b <- "data/ISBI/train-labels/"
+  filess <- paste(b, files, sep = "")
+  list_of_images <- lapply(filess, function(x) {
     x <- load.image(x)
     y <- resize(x, size_x = IMG_SIZE, size_y = IMG_SIZE)
   })
   
-  train.y = do.call('cbind', lapply(list_of_images, as.vector))
+  train.y <- do.call("cbind", lapply(list_of_images, as.vector))
   
-  train.y[which(train.y < 0.5)] = 0
-  train.y[which(train.y > 0.5)] = 1
-  train.y.array = train.y
-  dim(train.y.array) = c(IMG_SIZE, IMG_SIZE, 1, 30)
+  train.y[which(train.y < 0.5)] <- 0
+  train.y[which(train.y > 0.5)] <- 1
+  train.y.array <- train.y
+  dim(train.y.array) <- c(IMG_SIZE, IMG_SIZE, 1, 30)
   
   devices <- mx.ctx.default()
   mx.set.seed(0)
   
   net <- get_unet()
   
-  model <- mx.model.FeedForward.create(net, X = train.array, y = train.y.array,
-                                       ctx = devices, num.round = 2,
-                                       initializer = mx.init.normal(sqrt(2 / 
576)),
-                                       learning.rate = 0.05,
-                                       momentum = 0.99,
-                                       array.batch.size = 2)
+  model <- mx.model.FeedForward.create(net, X = train.array, y = 
train.y.array, 
+    ctx = devices, num.round = 2, initializer = mx.init.normal(sqrt(2/576)), 
+    learning.rate = 0.05, momentum = 0.99, array.batch.size = 2)
 })
diff --git a/R-package/tests/testthat/test_initializer.R 
b/R-package/tests/testthat/test_initializer.R
new file mode 100644
index 00000000000..c005244d02b
--- /dev/null
+++ b/R-package/tests/testthat/test_initializer.R
@@ -0,0 +1,114 @@
+require(mxnet)
+
+context("initializer")
+
+testthat("mx.init.uniform", {
+  uniform_init <- mx.init.uniform(scale = 1)
+  expect_equal(typeof(uniform_init), "closure")
+  
+  X_bias <- uniform_init("X_bias", c(1, 10000), ctx = mx.ctx.default())
+  expect_equal(X_bias, mx.nd.zeros(c(1, 10000)))
+  
+  X_weight <- uniform_init("X_weight", c(5, 10, 10000), ctx = mx.ctx.default())
+  expect_equal(X_weight >= -1, mx.nd.ones(c(5, 10, 10000)))
+  expect_equal(X_weight <= 1, mx.nd.ones(c(5, 10, 10000)))
+  mean_weight <- mean(as.array(X_weight))
+  expect_equal(mean_weight, 0, tolerance = 0.01)
+})
+
+testthat("mx.init.normal", {
+  normal_init <- mx.init.normal(sd = 0.1)
+  expect_equal(typeof(normal_init), "closure")
+  
+  X_bias <- normal_init("X_bias", c(1, 10000), ctx = mx.ctx.default())
+  expect_equal(X_bias, mx.nd.zeros(c(1, 10000)))
+  
+  X_weight <- normal_init("X_weight", c(5, 10, 10000), ctx = mx.ctx.default())
+  weight_mean <- mean(as.array(X_weight))
+  weight_sd <- sd(as.array(X_weight))
+  expect_equal(weight_mean, 0, tolerance = 0.01)
+  expect_equal(weight_sd, 0.1, tolerance = 0.01)
+})
+
+testthat("mx.init.Xavier", {
+  xavier_init <- mx.init.Xavier()
+  expect_equal(typeof(xavier_init), "closure")
+  
+  # default parameters
+  shape <- c(2, 3, 324, 324)
+  fan_out <- shape[length(shape)]
+  fan_in <- prod(shape[-length(shape)])
+  
+  X_bias <- xavier_init("X_bias", shape = shape, ctx = mx.ctx.default())
+  expect_equal(X_bias, mx.nd.zeros(shape))
+  
+  X_weight <- xavier_init("X_weight", shape = shape, ctx = mx.ctx.default())
+  scale <- sqrt(3/((fan_in + fan_out)/2))
+  expect_equal(X_weight >= -scale, mx.nd.ones(shape))
+  expect_equal(X_weight <= scale, mx.nd.ones(shape))
+  weight_mean <- mean(as.array(X_weight))
+  expect_equal(weight_mean, 0, tolerance = 0.01)
+  
+  for (dist_type in c("gaussian", "uniform")) {
+    for (factor_type in c("in", "out", "avg")) {
+      xavier_init <- mx.init.Xavier(rnd_type = dist_type, factor_type = 
factor_type, 
+        magnitude = 200)
+      expect_equal(typeof(xavier_init), "closure")
+      
+      X_weight <- xavier_init("X_weight", shape = shape, ctx = 
mx.ctx.default())
+      factor_val <- switch(factor_type, avg = (fan_in + fan_out)/2, `in` = 
fan_in, 
+        out = fan_out)
+      scale <- sqrt(200/factor_val)
+      
+      if (dist_type == "gaussian") {
+        weight_mean <- mean(as.array(X_weight))
+        weight_sd <- sd(as.array(X_weight))
+        expect_equal(weight_mean, 0, tolerance = 0.01)
+        expect_equal(weight_sd, scale, tolerance = 0.01)
+      } else {
+        expect_equal(X_weight >= -scale, mx.nd.ones(shape))
+        expect_equal(X_weight <= scale, mx.nd.ones(shape))
+        weight_mean <- mean(as.array(X_weight))
+        expect_equal(weight_mean, 0, tolerance = 0.01)
+      }
+    }
+  }
+})
+
+testthat("mx.init.internal.default", {
+  sample_bias <- mxnet:::mx.init.internal.default("X_bias", c(5, 10, 100), ctx 
= mx.ctx.default())
+  expect_equal(sample_bias, mx.nd.zeros(c(5, 10, 100)))
+  
+  sample_gamma <- mxnet:::mx.init.internal.default("X_gamma", c(5, 10, 100), 
ctx = mx.ctx.default())
+  expect_equal(sample_gamma, mx.nd.ones(c(5, 10, 100)))
+  
+  sample_beta <- mxnet:::mx.init.internal.default("X_beta", c(5, 10, 100), ctx 
= mx.ctx.default())
+  expect_equal(sample_beta, mx.nd.zeros(c(5, 10, 100)))
+  
+  sample_moving_mean <- mxnet:::mx.init.internal.default("X_moving_mean", c(5, 
+    10, 100), ctx = mx.ctx.default())
+  expect_equal(sample_moving_mean, mx.nd.zeros(c(5, 10, 100)))
+  
+  sample_moving_var <- mxnet:::mx.init.internal.default("X_moving_var", c(5, 
10, 
+    100), ctx = mx.ctx.default())
+  expect_equal(sample_moving_var, mx.nd.ones(c(5, 10, 100)))
+  
+  expect_error(mxnet:::mx.init.internal.default("X", c(5, 10, 100), ctx = 
mx.ctx.default()), 
+    "Unkown initialization pattern for  X")
+})
+
+testthat("mx.init.create", {
+  uniform_init <- mx.init.uniform(scale = 1)
+  expect_equal(typeof(uniform_init), "closure")
+  arrs <- setNames(as.list(c(50000, 100)), c("X_weight", "X_bias"))
+  arr_init <- mx.init.create(uniform_init, arrs, ctx = mx.ctx.default())
+  
+  X_bias <- arr_init$X_bias
+  expect_equal(X_bias, mx.nd.zeros(c(100)))
+  
+  X_weight <- arr_init$X_weight
+  expect_equal(X_weight >= -1, mx.nd.ones(c(50000)))
+  expect_equal(X_weight <= 1, mx.nd.ones(c(50000)))
+  mean_weight <- mean(as.array(X_weight))
+  expect_equal(mean_weight, 0, tolerance = 0.01)
+})
diff --git a/R-package/tests/testthat/test_io.R 
b/R-package/tests/testthat/test_io.R
index d619856cbb9..32f6c58d3cb 100644
--- a/R-package/tests/testthat/test_io.R
+++ b/R-package/tests/testthat/test_io.R
@@ -7,22 +7,15 @@ source("get_data.R")
 test_that("MNISTIter", {
   GetMNIST_ubyte()
   batch.size <- 100
-  train_dataiter <- mx.io.MNISTIter(
-    image = "data/train-images-idx3-ubyte",
-    label = "data/train-labels-idx1-ubyte",
-    data.shape = c(784),
-    batch.size = batch.size,
-    shuffle = TRUE,
-    flat = TRUE,
-    silent = 0,
-    seed = 10
-  )
+  train_dataiter <- mx.io.MNISTIter(image = "data/train-images-idx3-ubyte", 
label = "data/train-labels-idx1-ubyte", 
+    data.shape = c(784), batch.size = batch.size, shuffle = TRUE, flat = TRUE, 
+    silent = 0, seed = 10)
   train_dataiter$reset()
-  batch_count = 0
+  batch_count <- 0
   while (train_dataiter$iter.next()) {
-    batch_count = batch_count + 1
+    batch_count <- batch_count + 1
   }
-  nbatch = 60000 / batch.size
+  nbatch <- 60000/batch.size
   expect_equal(batch_count, nbatch)
   train_dataiter$reset()
   train_dataiter$iter.next()
@@ -39,21 +32,15 @@ test_that("MNISTIter", {
 
 test_that("Cifar10Rec", {
   GetCifar10()
-  dataiter <- mx.io.ImageRecordIter(
-    path.imgrec     = "./data/cifar/train.rec",
-    path.imglist    = "./data/cifar/train.lst",
-    mean.img        = "./data/cifar/cifar10_mean.bin",
-    batch.size      = 100,
-    data.shape      = c(28, 28, 3),
-    rand.crop       = TRUE,
-    rand.mirror     = TRUE
-  )
-  labelcount = rep(0, 10)
+  dataiter <- mx.io.ImageRecordIter(path.imgrec = "./data/cifar/train.rec", 
path.imglist = "./data/cifar/train.lst", 
+    mean.img = "./data/cifar/cifar10_mean.bin", batch.size = 100, data.shape = 
c(28, 
+      28, 3), rand.crop = TRUE, rand.mirror = TRUE)
+  labelcount <- rep(0, 10)
   dataiter$reset()
   while (dataiter$iter.next()) {
-    label = as.array(dataiter$value()$label)
+    label <- as.array(dataiter$value()$label)
     for (i in label) {
-      labelcount[i + 1] = labelcount[i + 1] + 1
+      labelcount[i + 1] <- labelcount[i + 1] + 1
     }
   }
   
@@ -65,20 +52,20 @@ test_that("mx.io.arrayiter", {
   y <- c(1:100)
   dataiter <- mx.io.arrayiter(X, y, batch.size = 20, shuffle = FALSE)
   dataiter$reset()
-  batch_count = 0
+  batch_count <- 0
   while (dataiter$iter.next()) {
-    batch_count = batch_count + 1
+    batch_count <- batch_count + 1
   }
-  expect_equal(batch_count, 100 / 20)
+  expect_equal(batch_count, 100/20)
   
-  y <- round(y / 10)
+  y <- round(y/10)
   dataiter <- mx.io.arrayiter(X, y, batch.size = 30, shuffle = FALSE)
   labelcount <- rep(0, 11)
   dataiter$reset()
   while (dataiter$iter.next()) {
     label <- as.array(dataiter$value()$label)
     for (i in label) {
-      labelcount[i + 1] = labelcount[i + 1] + 1
+      labelcount[i + 1] <- labelcount[i + 1] + 1
     }
   }
   
diff --git a/R-package/tests/testthat/test_model.R 
b/R-package/tests/testthat/test_model.R
index 6167ed66c41..f4be49d5fdd 100644
--- a/R-package/tests/testthat/test_model.R
+++ b/R-package/tests/testthat/test_model.R
@@ -4,76 +4,64 @@ source("get_data.R")
 
 context("models")
 
-if (Sys.getenv("R_GPU_ENABLE") != "" & as.integer(Sys.getenv("R_GPU_ENABLE")) 
== 1) {
+if (Sys.getenv("R_GPU_ENABLE") != "" & as.integer(Sys.getenv("R_GPU_ENABLE")) 
== 
+  1) {
   mx.ctx.default(new = mx.gpu())
   message("Using GPU for testing.")
 }
 
 test_that("MNIST", {
-#   # Network configuration
-   GetMNIST_ubyte()
-   batch.size <- 100
-   data <- mx.symbol.Variable("data")
-   fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=128)
-   act1 <- mx.symbol.Activation(fc1, name="relu1", act_type="relu")
-   fc2 <- mx.symbol.FullyConnected(act1, name = "fc2", num_hidden = 64)
-   act2 <- mx.symbol.Activation(fc2, name="relu2", act_type="relu")
-   fc3 <- mx.symbol.FullyConnected(act2, name="fc3", num_hidden=10)
-   softmax <- mx.symbol.Softmax(fc3, name = "sm")
-   
-   dtrain = mx.io.MNISTIter(
-     image="data/train-images-idx3-ubyte",
-     label="data/train-labels-idx1-ubyte",
-     data.shape=c(784),
-     batch.size=batch.size,
-     shuffle=TRUE,
-     flat=TRUE,
-     silent=0,
-     seed=10)
-   
-   dtest = mx.io.MNISTIter(
-     image="data/t10k-images-idx3-ubyte",
-     label="data/t10k-labels-idx1-ubyte",
-     data.shape=c(784),
-     batch.size=batch.size,
-     shuffle=FALSE,
-     flat=TRUE,
-     silent=0)
-   
-   mx.set.seed(0)
-
-   # create the model
-   model <- mx.model.FeedForward.create(softmax, X=dtrain, eval.data=dtest,
-                                        ctx = mx.ctx.default(), num.round=1,
-                                        learning.rate=0.1, momentum=0.9,
-                                        initializer=mx.init.uniform(0.07),
-                                        
epoch.end.callback=mx.callback.save.checkpoint("chkpt"),
-                                        
batch.end.callback=mx.callback.log.train.metric(100))
-   
-   # do prediction
-   pred <- predict(model, dtest)
-   label <- mx.io.extract(dtest, "label")
-   dataX <- mx.io.extract(dtest, "data")
-   # Predict with R's array
-   pred2 <- predict(model, X=dataX)
-   
-   accuracy <- function(label, pred) {
-     ypred = max.col(t(as.array(pred)))
-     return(sum((as.array(label) + 1) == ypred) / length(label))
-   }
-
-   expect_equal(accuracy(label, pred), accuracy(label, pred2))
-   
-   file.remove("chkpt-0001.params")
-   file.remove("chkpt-symbol.json")
+  # # Network configuration
+  GetMNIST_ubyte()
+  batch.size <- 100
+  data <- mx.symbol.Variable("data")
+  fc1 <- mx.symbol.FullyConnected(data, name = "fc1", num_hidden = 128)
+  act1 <- mx.symbol.Activation(fc1, name = "relu1", act_type = "relu")
+  fc2 <- mx.symbol.FullyConnected(act1, name = "fc2", num_hidden = 64)
+  act2 <- mx.symbol.Activation(fc2, name = "relu2", act_type = "relu")
+  fc3 <- mx.symbol.FullyConnected(act2, name = "fc3", num_hidden = 10)
+  softmax <- mx.symbol.Softmax(fc3, name = "sm")
+  
+  dtrain <- mx.io.MNISTIter(image = "data/train-images-idx3-ubyte", label = 
"data/train-labels-idx1-ubyte", 
+    data.shape = c(784), batch.size = batch.size, shuffle = TRUE, flat = TRUE, 
+    silent = 0, seed = 10)
+  
+  dtest <- mx.io.MNISTIter(image = "data/t10k-images-idx3-ubyte", label = 
"data/t10k-labels-idx1-ubyte", 
+    data.shape = c(784), batch.size = batch.size, shuffle = FALSE, flat = 
TRUE, 
+    silent = 0)
+  
+  mx.set.seed(0)
+  
+  # create the model
+  model <- mx.model.FeedForward.create(softmax, X = dtrain, eval.data = dtest, 
+    ctx = mx.ctx.default(), num.round = 1, learning.rate = 0.1, momentum = 
0.9, 
+    initializer = mx.init.uniform(0.07), epoch.end.callback = 
mx.callback.save.checkpoint("chkpt"), 
+    batch.end.callback = mx.callback.log.train.metric(100))
+  
+  # do prediction
+  pred <- predict(model, dtest)
+  label <- mx.io.extract(dtest, "label")
+  dataX <- mx.io.extract(dtest, "data")
+  # Predict with R's array
+  pred2 <- predict(model, X = dataX)
+  
+  accuracy <- function(label, pred) {
+    ypred <- max.col(t(as.array(pred)))
+    return(sum((as.array(label) + 1) == ypred)/length(label))
+  }
+  
+  expect_equal(accuracy(label, pred), accuracy(label, pred2))
+  
+  file.remove("chkpt-0001.params")
+  file.remove("chkpt-symbol.json")
 })
 
 test_that("Regression", {
   data(BostonHousing, package = "mlbench")
   train.ind <- seq(1, 506, 3)
-  train.x <- data.matrix(BostonHousing[train.ind,-14])
+  train.x <- data.matrix(BostonHousing[train.ind, -14])
   train.y <- BostonHousing[train.ind, 14]
-  test.x <- data.matrix(BostonHousing[-train.ind,-14])
+  test.x <- data.matrix(BostonHousing[-train.ind, -14])
   test.y <- BostonHousing[-train.ind, 14]
   data <- mx.symbol.Variable("data")
   fc1 <- mx.symbol.FullyConnected(data, num_hidden = 1)
@@ -81,16 +69,13 @@ test_that("Regression", {
   
   demo.metric.mae <- mx.metric.custom("mae", function(label, pred) {
     pred <- mx.nd.reshape(pred, shape = 0)
-    res <- mx.nd.mean(mx.nd.abs(label-pred))
+    res <- mx.nd.mean(mx.nd.abs(label - pred))
     return(as.array(res))
   })
   mx.set.seed(0)
-  model <- mx.model.FeedForward.create(lro, X = train.x, y = train.y,
-                                       ctx = mx.ctx.default(), num.round = 5,
-                                       array.batch.size = 20,
-                                       learning.rate = 2e-6,
-                                       momentum = 0.9,
-                                       eval.metric = demo.metric.mae)
+  model <- mx.model.FeedForward.create(lro, X = train.x, y = train.y, ctx = 
mx.ctx.default(), 
+    num.round = 5, array.batch.size = 20, learning.rate = 2e-06, momentum = 
0.9, 
+    eval.metric = demo.metric.mae)
   
   train.x <- data.matrix(BostonHousing[train.ind, -(13:14)])
   train.y <- BostonHousing[train.ind, c(13:14)]
@@ -98,18 +83,14 @@ test_that("Regression", {
   test.y <- BostonHousing[-train.ind, c(13:14)]
   
   data <- mx.symbol.Variable("data")
-  fc2 <- mx.symbol.FullyConnected(data, num_hidden=2)
+  fc2 <- mx.symbol.FullyConnected(data, num_hidden = 2)
   lro2 <- mx.symbol.LinearRegressionOutput(fc2)
   
   mx.set.seed(0)
-  train_iter = mx.io.arrayiter(data = t(train.x), label = t(train.y))
-  
-  model <- mx.model.FeedForward.create(lro2, X = train_iter,
-                                       ctx = mx.ctx.default(),
-                                       num.round = 50,
-                                       array.batch.size = 20,
-                                       learning.rate = 2e-6,
-                                       momentum = 0.9)
+  train_iter <- mx.io.arrayiter(data = t(train.x), label = t(train.y))
+  
+  model <- mx.model.FeedForward.create(lro2, X = train_iter, ctx = 
mx.ctx.default(), 
+    num.round = 50, array.batch.size = 20, learning.rate = 2e-06, momentum = 
0.9)
 })
 
 
@@ -122,23 +103,18 @@ test_that("Classification", {
   test.x <- data.matrix(Sonar[-train.ind, 1:60])
   test.y <- Sonar[-train.ind, 61]
   mx.set.seed(0)
-  model <- mx.mlp(train.x, train.y, hidden_node = 10,
-                  out_node = 2, out_activation = "softmax",
-                  num.round = 5, array.batch.size = 15,
-                  learning.rate = 0.07,
-                  momentum = 0.9,
-                  eval.metric = mx.metric.accuracy)
+  model <- mx.mlp(train.x, train.y, hidden_node = 10, out_node = 2, 
out_activation = "softmax", 
+    num.round = 5, array.batch.size = 15, learning.rate = 0.07, momentum = 
0.9, 
+    eval.metric = mx.metric.accuracy)
 })
 
 test_that("Fine-tune", {
   GetInception()
   GetCatDog()
-  train_iter <- mx.io.ImageRecordIter(path.imgrec = 
"./data/cats_dogs/cats_dogs_train.rec",
-                                      batch.size  = 8, data.shape  = c(224, 
224, 3),
-                                      rand.crop   = TRUE, rand.mirror = TRUE)
-  val_iter <- mx.io.ImageRecordIter(path.imgrec = 
"./data/cats_dogs/cats_dogs_val.rec",
-                                    batch.size  = 8, data.shape  = c(224, 224, 
3),
-                                    rand.crop   = FALSE, rand.mirror = FALSE)
+  train_iter <- mx.io.ImageRecordIter(path.imgrec = 
"./data/cats_dogs/cats_dogs_train.rec", 
+    batch.size = 8, data.shape = c(224, 224, 3), rand.crop = TRUE, rand.mirror 
= TRUE)
+  val_iter <- mx.io.ImageRecordIter(path.imgrec = 
"./data/cats_dogs/cats_dogs_val.rec", 
+    batch.size = 8, data.shape = c(224, 224, 3), rand.crop = FALSE, 
rand.mirror = FALSE)
   inception_bn <- mx.model.load("./model/Inception-BN", iteration = 126)
   symbol <- inception_bn$symbol
   internals <- symbol$get.internals()
@@ -148,11 +124,8 @@ test_that("Fine-tune", {
   
   new_fc <- mx.symbol.FullyConnected(data = flatten, num_hidden = 2, name = 
"fc1")
   new_soft <- mx.symbol.SoftmaxOutput(data = new_fc, name = "softmax")
-  arg_params_new <- mx.model.init.params(symbol = new_soft,
-                                         input.shape = list("data" = c(224, 
224, 3, 8)),
-                                         output.shape = NULL,
-                                         initializer = mx.init.uniform(0.1),
-                                         ctx = mx.cpu())$arg.params
+  arg_params_new <- mx.model.init.params(symbol = new_soft, input.shape = 
list(data = c(224, 
+    224, 3, 8)), output.shape = NULL, initializer = mx.init.uniform(0.1), ctx 
= mx.cpu())$arg.params
   fc1_weights_new <- arg_params_new[["fc1_weight"]]
   fc1_bias_new <- arg_params_new[["fc1_bias"]]
   
@@ -160,25 +133,22 @@ test_that("Fine-tune", {
   
   arg_params_new[["fc1_weight"]] <- fc1_weights_new
   arg_params_new[["fc1_bias"]] <- fc1_bias_new
-
-  #model <- mx.model.FeedForward.create(symbol = new_soft, X = train_iter, 
eval.data = val_iter,
-  #                                     ctx = mx.ctx.default(), eval.metric = 
mx.metric.accuracy,
-  #                                     num.round = 2, learning.rate = 0.05, 
momentum = 0.9,
-  #                                     wd = 0.00001, kvstore = "local",
-  #                                     batch.end.callback = 
mx.callback.log.train.metric(50),
-  #                                     initializer = 
mx.init.Xavier(factor_type = "in", magnitude = 2.34),
-  #                                     optimizer = "sgd",
-  #                                     arg.params = arg_params_new,
-  #                                     aux.params = inception_bn$aux.params)
-})                                       
+  
+  # model <- mx.model.FeedForward.create(symbol = new_soft, X = train_iter,
+  # eval.data = val_iter, ctx = mx.ctx.default(), eval.metric = 
mx.metric.accuracy,
+  # num.round = 2, learning.rate = 0.05, momentum = 0.9, wd = 0.00001, kvstore 
=
+  # 'local', batch.end.callback = mx.callback.log.train.metric(50), 
initializer =
+  # mx.init.Xavier(factor_type = 'in', magnitude = 2.34), optimizer = 'sgd',
+  # arg.params = arg_params_new, aux.params = inception_bn$aux.params)
+})
 
 test_that("Matrix Factorization", {
   
   # Use fake random data instead of GetMovieLens() to remove external 
dependency
   set.seed(123)
-  user <- sample(943, size = 100000, replace = T)
-  item <- sample(1682, size = 100000, replace = T)
-  score <- sample(5, size = 100000, replace = T)
+  user <- sample(943, size = 1e+05, replace = T)
+  item <- sample(1682, size = 1e+05, replace = T)
+  score <- sample(5, size = 1e+05, replace = T)
   DF <- data.frame(user, item, score)
   
   max_user <- max(DF$user)
@@ -189,95 +159,74 @@ test_that("Matrix Factorization", {
   user <- mx.symbol.Variable("user")
   item <- mx.symbol.Variable("item")
   score <- mx.symbol.Variable("score")
-  user1 <- mx.symbol.Embedding(data = mx.symbol.BlockGrad(user), input_dim = 
max_user,
-                               output_dim = k, name = "user1")
-  item1 <- mx.symbol.Embedding(data = mx.symbol.BlockGrad(item), input_dim = 
max_item,
-                               output_dim = k, name = "item1")
+  user1 <- mx.symbol.Embedding(data = mx.symbol.BlockGrad(user), input_dim = 
max_user, 
+    output_dim = k, name = "user1")
+  item1 <- mx.symbol.Embedding(data = mx.symbol.BlockGrad(item), input_dim = 
max_item, 
+    output_dim = k, name = "item1")
   pred <- user1 * item1
   pred1 <- mx.symbol.sum_axis(pred, axis = 1, name = "pred1")
   pred2 <- mx.symbol.Flatten(pred1, name = "pred2")
   pred3 <- mx.symbol.LinearRegressionOutput(data = pred2, label = score, name 
= "pred3")
-
+  
   mx.set.seed(123)
   
-  CustomIter <- setRefClass( "CustomIter", fields = c("iter1", "iter2"),
-                             contains = "Rcpp_MXArrayDataIter",
-      methods = list(
-        initialize = function(iter1, iter2) {
-          .self$iter1 <- iter1
-          .self$iter2 <- iter2
-          .self
-        },
-        value = function() {
-          user <- .self$iter1$value()$data
-          item <- .self$iter2$value()$data
-          score <- .self$iter1$value()$label
-          list(user = user,
-               item = item,
-               score = score)
-        },
-        iter.next = function() {
-          .self$iter1$iter.next()
-          .self$iter2$iter.next()
-        },
-        reset = function() {
-          .self$iter1$reset()
-          .self$iter2$reset()
-        },
-        num.pad = function() {
-          .self$iter1$num.pad()
-        },
-        finalize = function() {
-          .self$iter1$finalize()
-          .self$iter2$finalize()
-        }
-      )
-    )
-  
-  user_iter = mx.io.arrayiter(data = DF[, 1], label = DF[, 3], batch.size = k)
-  
-  item_iter = mx.io.arrayiter(data = DF[, 2], label = DF[, 3], batch.size = k)
+  CustomIter <- setRefClass("CustomIter", fields = c("iter1", "iter2"), 
contains = "Rcpp_MXArrayDataIter", 
+    methods = list(initialize = function(iter1, iter2) {
+      .self$iter1 <- iter1
+      .self$iter2 <- iter2
+      .self
+    }, value = function() {
+      user <- .self$iter1$value()$data
+      item <- .self$iter2$value()$data
+      score <- .self$iter1$value()$label
+      list(user = user, item = item, score = score)
+    }, iter.next = function() {
+      .self$iter1$iter.next()
+      .self$iter2$iter.next()
+    }, reset = function() {
+      .self$iter1$reset()
+      .self$iter2$reset()
+    }, num.pad = function() {
+      .self$iter1$num.pad()
+    }, finalize = function() {
+      .self$iter1$finalize()
+      .self$iter2$finalize()
+    }))
+  
+  user_iter <- mx.io.arrayiter(data = DF[, 1], label = DF[, 3], batch.size = k)
+  
+  item_iter <- mx.io.arrayiter(data = DF[, 2], label = DF[, 3], batch.size = k)
   
   train_iter <- CustomIter$new(user_iter, item_iter)
   
-  model <- mx.model.FeedForward.create(pred3, X = train_iter, ctx = 
mx.ctx.default(),
-                                       num.round = 5, initializer = 
mx.init.uniform(0.07),
-                                       learning.rate = 0.07,
-                                       eval.metric = mx.metric.rmse,
-                                       momentum = 0.9,
-                                       epoch.end.callback = 
mx.callback.log.train.metric(1),
-                                       input.names = c("user", "item"),
-                                       output.names = "score")
+  model <- mx.model.FeedForward.create(pred3, X = train_iter, ctx = 
mx.ctx.default(), 
+    num.round = 5, initializer = mx.init.uniform(0.07), learning.rate = 0.07, 
+    eval.metric = mx.metric.rmse, momentum = 0.9, epoch.end.callback = 
mx.callback.log.train.metric(1), 
+    input.names = c("user", "item"), output.names = "score")
 })
 
 test_that("Captcha", {
   GetCaptcha_data()
   data.shape <- c(80, 30, 3)
   batch_size <- 40
-  train <- mx.io.ImageRecordIter(
-    path.imgrec   = "./data/captcha_example/captcha_train.rec",
-    path.imglist  = "./data/captcha_example/captcha_train.lst",
-    batch.size    = batch_size,
-    label.width   = 4,
-    data.shape    = data.shape,
-    mean.img      = "mean.bin")
-  
-  val <- mx.io.ImageRecordIter(
-    path.imgrec   = "./data/captcha_example/captcha_test.rec",
-    path.imglist  = "./data/captcha_example/captcha_test.lst",
-    batch.size    = batch_size,
-    label.width   = 4,
-    data.shape    = data.shape,
-    mean.img      = "mean.bin")
+  train <- mx.io.ImageRecordIter(path.imgrec = 
"./data/captcha_example/captcha_train.rec", 
+    path.imglist = "./data/captcha_example/captcha_train.lst", batch.size = 
batch_size, 
+    label.width = 4, data.shape = data.shape, mean.img = "mean.bin")
+  
+  val <- mx.io.ImageRecordIter(path.imgrec = 
"./data/captcha_example/captcha_test.rec", 
+    path.imglist = "./data/captcha_example/captcha_test.lst", batch.size = 
batch_size, 
+    label.width = 4, data.shape = data.shape, mean.img = "mean.bin")
   
   data <- mx.symbol.Variable("data")
   label <- mx.symbol.Variable("label")
   conv1 <- mx.symbol.Convolution(data = data, kernel = c(5, 5), num_filter = 
32)
-  pool1 <- mx.symbol.Pooling(data = conv1, pool_type = "max", kernel = c(2, 
2), stride = c(1, 1))
+  pool1 <- mx.symbol.Pooling(data = conv1, pool_type = "max", kernel = c(2, 
2), 
+    stride = c(1, 1))
   relu1 <- mx.symbol.Activation(data = pool1, act_type = "relu")
   
   conv2 <- mx.symbol.Convolution(data = relu1, kernel = c(5, 5), num_filter = 
32)
-  pool2 <- mx.symbol.Pooling(data = conv2, pool_type = "avg", kernel = c(2, 
2), stride = c(1, 1))
+  pool2 <- mx.symbol.Pooling(data = conv2, pool_type = "avg", kernel = c(2, 
2), 
+    stride = c(1, 1))
   relu2 <- mx.symbol.Activation(data = pool2, act_type = "relu")
   
   flatten <- mx.symbol.Flatten(data = relu2)
@@ -292,8 +241,8 @@ test_that("Captcha", {
   captcha_net <- mx.symbol.SoftmaxOutput(data = fc2, label = label, name = 
"softmax")
   
   mx.metric.acc2 <- mx.metric.custom("accuracy", function(label, pred) {
-    label = as.array(label)
-    pred = as.array(pred)
+    label <- as.array(label)
+    pred <- as.array(pred)
     ypred <- max.col(t(pred)) - 1
     ypred <- matrix(ypred, nrow = nrow(label), ncol = ncol(label), byrow = 
TRUE)
     return(sum(colSums(label == ypred) == 4)/ncol(label))
@@ -305,26 +254,20 @@ test_that("Captcha", {
   train$iter.next()
   
   input.names <- "data"
-  input.shape <- sapply(input.names, function(n){dim(train$value()[[n]])}, 
simplify = FALSE)
+  input.shape <- sapply(input.names, function(n) {
+    dim(train$value()[[n]])
+  }, simplify = FALSE)
   arg_names <- arguments(captcha_net)
   output.names <- "label"
-  output.shape <- sapply(output.names, function(n){dim(train$value()[[n]])}, 
simplify = FALSE)
-  params <- mx.model.init.params(captcha_net, input.shape, output.shape, 
-                                 mx.init.Xavier(factor_type = "in", magnitude 
= 2.34),
-                                 mx.cpu())
-
-  #model <- mx.model.FeedForward.create(
-  #  X                  = train,
-  #  eval.data          = val,
-  #  ctx                = mx.ctx.default(),
-  #  symbol             = captcha_net,
-  #  eval.metric        = mx.metric.acc2,
-  #  num.round          = 1,
-  #  learning.rate      = 1e-04,
-  #  momentum           = 0.9,
-  #  wd                 = 1e-05,
-  #  batch.end.callback = mx.callback.log.train.metric(50),
-  #  initializer        = mx.init.Xavier(factor_type = "in", magnitude = 2.34),
-  #  optimizer          = "sgd",
-  #  clip_gradient      = 10)
+  output.shape <- sapply(output.names, function(n) {
+    dim(train$value()[[n]])
+  }, simplify = FALSE)
+  params <- mx.model.init.params(captcha_net, input.shape, output.shape, 
mx.init.Xavier(factor_type = "in", 
+    magnitude = 2.34), mx.cpu())
+  
+  # model <- mx.model.FeedForward.create( X = train, eval.data = val, ctx =
+  # mx.ctx.default(), symbol = captcha_net, eval.metric = mx.metric.acc2, 
num.round
+  # = 1, learning.rate = 1e-04, momentum = 0.9, wd = 1e-05, batch.end.callback 
=
+  # mx.callback.log.train.metric(50), initializer = mx.init.Xavier(factor_type 
=
+  # 'in', magnitude = 2.34), optimizer = 'sgd', clip_gradient = 10)
 })
diff --git a/R-package/tests/testthat/test_ndarray.R 
b/R-package/tests/testthat/test_ndarray.R
index 326ea6ca7f3..4850823e29d 100644
--- a/R-package/tests/testthat/test_ndarray.R
+++ b/R-package/tests/testthat/test_ndarray.R
@@ -2,45 +2,46 @@ require(mxnet)
 
 context("ndarray")
 
-if (Sys.getenv("R_GPU_ENABLE") != "" & as.integer(Sys.getenv("R_GPU_ENABLE")) 
== 1) {
+if (Sys.getenv("R_GPU_ENABLE") != "" & as.integer(Sys.getenv("R_GPU_ENABLE")) 
== 
+  1) {
   mx.ctx.default(new = mx.gpu())
   message("Using GPU for testing.")
 }
 
 test_that("element-wise calculation for vector", {
-  x = 1:10
-  mat = mx.nd.array(as.array(x), mx.ctx.default())
+  x <- 1:10
+  mat <- mx.nd.array(as.array(x), mx.ctx.default())
   expect_equal(x, as.array(mat))
   expect_equal(x + 1, as.array(mat + 1))
   expect_equal(x - 10, as.array(mat - 10))
   expect_equal(x * 20, as.array(mat * 20))
-  expect_equal(x / 3, as.array(mat / 3), tolerance = 1e-5)
+  expect_equal(x/3, as.array(mat/3), tolerance = 1e-05)
   expect_equal(-1 - x, as.array(-1 - mat))
-  expect_equal(-5 / x, as.array(-5 / mat), tolerance = 1e-5)
+  expect_equal(-5/x, as.array(-5/mat), tolerance = 1e-05)
   expect_equal(x + x, as.array(mat + mat))
-  expect_equal(x / x, as.array(mat / mat))
+  expect_equal(x/x, as.array(mat/mat))
   expect_equal(x * x, as.array(mat * mat))
   expect_equal(x - x, as.array(mat - mat))
   expect_equal(as.array(1 - mat), as.array(1 - mat))
   
-  x <- runif(10,-10, 10)
-  nd = mx.nd.array(as.array(x))
-  expect_equal(sqrt(abs(x)), as.array(mx.nd.sqrt(mx.nd.abs(nd))), tolerance = 
1e-6)
-  expect_equal(x ^ 2, as.array(mx.nd.square(nd)), tolerance = 1e-6)
+  x <- runif(10, -10, 10)
+  nd <- mx.nd.array(as.array(x))
+  expect_equal(sqrt(abs(x)), as.array(mx.nd.sqrt(mx.nd.abs(nd))), tolerance = 
1e-06)
+  expect_equal(x^2, as.array(mx.nd.square(nd)), tolerance = 1e-06)
 })
 
 test_that("element-wise calculation for matrix", {
-  x = matrix(1:4, 2, 2)
-  mat = mx.nd.array(as.array(x), mx.ctx.default())
+  x <- matrix(1:4, 2, 2)
+  mat <- mx.nd.array(as.array(x), mx.ctx.default())
   expect_equal(x, as.array(mat))
   expect_equal(x + 1, as.array(mat + 1))
   expect_equal(x - 10, as.array(mat - 10))
   expect_equal(x * 20, as.array(mat * 20))
-  expect_equal(x / 3, as.array(mat / 3), tolerance = 1e-5)
+  expect_equal(x/3, as.array(mat/3), tolerance = 1e-05)
   expect_equal(-1 - x, as.array(-1 - mat))
-  expect_equal(-5 / x, as.array(-5 / mat), tolerance = 1e-5)
+  expect_equal(-5/x, as.array(-5/mat), tolerance = 1e-05)
   expect_equal(x + x, as.array(mat + mat))
-  expect_equal(x / x, as.array(mat / mat))
+  expect_equal(x/x, as.array(mat/mat))
   expect_equal(x * x, as.array(mat * mat))
   expect_equal(x - x, as.array(mat - mat))
   expect_equal(as.array(1 - mat), as.array(1 - mat))
@@ -51,20 +52,24 @@ test_that("ndarray ones, zeros, save and load", {
   expect_equal(matrix(0, 10, 5), as.array(mx.nd.zeros(c(10, 5))))
   expect_equal(rep(1, 10), as.array(mx.nd.ones(10)))
   expect_equal(matrix(1, 10, 5), as.array(mx.nd.ones(c(10, 5))))
-  mat = mx.nd.array(1:20)
-  mx.nd.save(mat, 'temp.mat')
-  mat2 = mx.nd.load('temp.mat')
+  mat <- mx.nd.array(1:20)
+  mx.nd.save(mat, "temp.mat")
+  mat2 <- mx.nd.load("temp.mat")
   expect_true(is.mx.ndarray(mat2[[1]]))
   expect_equal(as.array(mat), as.array(mat2[[1]]))
-  file.remove('temp.mat')
+  file.remove("temp.mat")
 })
 
 test_that("ndarray concatenate", {
   shapes <- matrix(c(2, 3, 4, 2, 2, 2, 4, 2, 2, 1, 4, 2), nrow = 3, byrow = 
TRUE)
-  array_r <- apply(shapes, 2, function(s) { runif(s, -10, 10) })
-  array_nd <- apply(array_r, 1, function(s) { mx.nd.array(matrix(s, nrow = 1)) 
})
+  array_r <- apply(shapes, 2, function(s) {
+    runif(s, -10, 10)
+  })
+  array_nd <- apply(array_r, 1, function(s) {
+    mx.nd.array(matrix(s, nrow = 1))
+  })
   array_nd_concat <- mx.nd.concat(data = array_nd, num_args = 3, dim = 1)
-  expect_equal(array_r, as.matrix(array_nd_concat), tolerance = 1e-6)
+  expect_equal(array_r, as.matrix(array_nd_concat), tolerance = 1e-06)
   
   x1 <- mx.nd.array(c(1:24))
   x2 <- mx.nd.array(c(25:48))
@@ -74,7 +79,8 @@ test_that("ndarray concatenate", {
   
   x1 <- array(1:24, dim = c(4, 3, 2))
   x2 <- array(25:48, dim = c(4, 3, 2))
-  x3 <- c(1:4, 25:28, 5:8, 29:32, 9:12, 33:36, 13:16, 37:40, 17:20, 41:44, 
21:24, 45:48)
+  x3 <- c(1:4, 25:28, 5:8, 29:32, 9:12, 33:36, 13:16, 37:40, 17:20, 41:44, 
21:24, 
+    45:48)
   y1 <- mx.nd.array(x1)
   y2 <- mx.nd.array(x2)
   y3 <- mx.nd.concat(data = c(y1, y2), num_args = 2, dim = 2)
@@ -83,8 +89,8 @@ test_that("ndarray concatenate", {
 })
 
 test_that("ndarray clip", {
-  nd <- mx.nd.array(runif(10,-10, 10))
-  nd2 <- mx.nd.clip(nd,-2, 3)
+  nd <- mx.nd.array(runif(10, -10, 10))
+  nd2 <- mx.nd.clip(nd, -2, 3)
   arr <- as.array(nd2)
   expect_equal(arr >= -2 | arr <= 3, rep(TRUE, length(arr)))
 })
@@ -98,7 +104,7 @@ test_that("ndarray dot", {
   B <- mx.nd.array(t(b))
   C <- mx.nd.dot(A, B)
   
-  expect_equal(c, t(as.matrix(C)), tolerance = 1e-6)
+  expect_equal(c, t(as.matrix(C)), tolerance = 1e-06)
 })
 
 test_that("ndarray crop", {
@@ -107,9 +113,10 @@ test_that("ndarray crop", {
   expect_equal(array(1, dim = c(2, 1, 3)), as.array(y))
   
   z <- mx.nd.zeros(c(2, 1, 3))
-  x <- mxnet:::mx.nd.internal.crop.assign(x, z, begin = c(0, 0, 0), end = c(2, 
1, 3))
+  x <- mxnet:::mx.nd.internal.crop.assign(x, z, begin = c(0, 0, 0), end = c(2, 
+    1, 3))
   arr_x <- array(1, dim = dim(x))
-  arr_x[c(1:2), 1 , c(1:3)] <- 0
+  arr_x[c(1:2), 1, c(1:3)] <- 0
   
   expect_equal(as.array(x), arr_x)
 })
@@ -118,77 +125,77 @@ test_that("ndarray negate", {
   arr <- array(runif(24, -10, 10), dim = c(2, 3, 4))
   nd <- mx.nd.array(arr)
   
-  expect_equal(arr, as.array(nd), tolerance = 1e-6)
-  expect_equal(-arr, as.array(-nd), tolerance = 1e-6)
-  expect_equal(arr, as.array(nd), tolerance = 1e-6)
+  expect_equal(arr, as.array(nd), tolerance = 1e-06)
+  expect_equal(-arr, as.array(-nd), tolerance = 1e-06)
+  expect_equal(arr, as.array(nd), tolerance = 1e-06)
 })
 
 test_that("ndarray equal", {
   x <- mx.nd.zeros(c(2, 3))
   y <- mx.nd.ones(c(2, 3))
-  z = x == y
-  expect_equal(as.array(z), array(0, c(2,3)))
+  z <- x == y
+  expect_equal(as.array(z), array(0, c(2, 3)))
   
-  z = 0 == x
-  expect_equal(as.array(z), array(1, c(2,3)))
+  z <- 0 == x
+  expect_equal(as.array(z), array(1, c(2, 3)))
 })
 
 test_that("ndarray not equal", {
   x <- mx.nd.zeros(c(2, 3))
   y <- mx.nd.ones(c(2, 3))
-  z = x != y
-  expect_equal(as.array(z), array(1, c(2,3)))
+  z <- x != y
+  expect_equal(as.array(z), array(1, c(2, 3)))
   
-  z = 0 != x
-  expect_equal(as.array(z), array(0, c(2,3)))
+  z <- 0 != x
+  expect_equal(as.array(z), array(0, c(2, 3)))
 })
 
 test_that("ndarray greater", {
   x <- mx.nd.zeros(c(2, 3))
   y <- mx.nd.ones(c(2, 3))
-  z = x > y
-  expect_equal(as.array(z), array(0, c(2,3)))
+  z <- x > y
+  expect_equal(as.array(z), array(0, c(2, 3)))
   
-  z = y > 0
-  expect_equal(as.array(z), array(1, c(2,3)))
+  z <- y > 0
+  expect_equal(as.array(z), array(1, c(2, 3)))
   
-  z = 0 > y
-  expect_equal(as.array(z), array(0, c(2,3)))
+  z <- 0 > y
+  expect_equal(as.array(z), array(0, c(2, 3)))
   
-  z = x >= y
-  expect_equal(as.array(z), array(0, c(2,3)))
+  z <- x >= y
+  expect_equal(as.array(z), array(0, c(2, 3)))
   
-  z = y >= 0
-  expect_equal(as.array(z), array(1, c(2,3)))
+  z <- y >= 0
+  expect_equal(as.array(z), array(1, c(2, 3)))
   
-  z = 0 >= y
-  expect_equal(as.array(z), array(0, c(2,3)))
+  z <- 0 >= y
+  expect_equal(as.array(z), array(0, c(2, 3)))
   
-  z = y >= 1
-  expect_equal(as.array(z), array(1, c(2,3)))
+  z <- y >= 1
+  expect_equal(as.array(z), array(1, c(2, 3)))
 })
 
 test_that("ndarray lesser", {
   x <- mx.nd.zeros(c(2, 3))
   y <- mx.nd.ones(c(2, 3))
-  z = x < y
-  expect_equal(as.array(z), array(1, c(2,3)))
+  z <- x < y
+  expect_equal(as.array(z), array(1, c(2, 3)))
   
-  z = y < 0
-  expect_equal(as.array(z), array(0, c(2,3)))
+  z <- y < 0
+  expect_equal(as.array(z), array(0, c(2, 3)))
   
-  z = 0 < y
-  expect_equal(as.array(z), array(1, c(2,3)))
+  z <- 0 < y
+  expect_equal(as.array(z), array(1, c(2, 3)))
   
-  z = x <= y
-  expect_equal(as.array(z), array(1, c(2,3)))
+  z <- x <= y
+  expect_equal(as.array(z), array(1, c(2, 3)))
   
-  z = y <= 0
-  expect_equal(as.array(z), array(0, c(2,3)))
+  z <- y <= 0
+  expect_equal(as.array(z), array(0, c(2, 3)))
   
-  z = 0 <= y
-  expect_equal(as.array(z), array(1, c(2,3)))
+  z <- 0 <= y
+  expect_equal(as.array(z), array(1, c(2, 3)))
   
-  z = y <= 1
-  expect_equal(as.array(z), array(1, c(2,3)))
-})
\ No newline at end of file
+  z <- y <= 1
+  expect_equal(as.array(z), array(1, c(2, 3)))
+})
diff --git a/R-package/tests/testthat/test_optimizer.R 
b/R-package/tests/testthat/test_optimizer.R
index c6dacaa728b..a02a9edf524 100644
--- a/R-package/tests/testthat/test_optimizer.R
+++ b/R-package/tests/testthat/test_optimizer.R
@@ -1,204 +1,168 @@
 context("optimizer")
 
 test_that("sgd", {
-
-  data = mx.symbol.Variable('data')
-  label = mx.symbol.Variable('label')
-  fc_weight = mx.symbol.Variable('fc_weight')
-  fc = mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, 
name = 'fc1', num_hidden = 1)
-  loss = mx.symbol.LinearRegressionOutput(data = fc, label = label, name = 
'loss')
-
-  x <- mx.nd.array(array(1:6, dim=2:3))
+  
+  data <- mx.symbol.Variable("data")
+  label <- mx.symbol.Variable("label")
+  fc_weight <- mx.symbol.Variable("fc_weight")
+  fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, 
+    name = "fc1", num_hidden = 1)
+  loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = 
"loss")
+  
+  x <- mx.nd.array(array(1:6, dim = 2:3))
   y <- mx.nd.array(c(5, 11, 16))
-  w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2,1)))
-
-  exec <- mxnet:::mx.symbol.bind(symbol = loss,
-                                 ctx = mx.cpu(),
-                                 arg.arrays = list(data = x,
-                                                   fc1_weight = w1,
-                                                   label = y),
-                                 aux.arrays = NULL,
-                                 grad.reqs = c("null", "write", "null"))
-
-  optimizer <- mx.opt.create("sgd",
-                             learning.rate = 1,
-                             momentum = 0,
-                             wd = 0,
-                             rescale.grad = 1,
-                             clip_gradient = -1)
-
+  w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1)))
+  
+  exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.cpu(), arg.arrays = 
list(data = x, 
+    fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", 
"write", 
+    "null"))
+  
+  optimizer <- mx.opt.create("sgd", learning.rate = 1, momentum = 0, wd = 0, 
rescale.grad = 1, 
+    clip_gradient = -1)
+  
   updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = 
mx.cpu())
-
+  
   mx.exec.forward(exec, is.train = T)
   mx.exec.backward(exec)
-
+  
   arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays)
   mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE)
-
-  expect_equal(as.array(arg.blocks[[2]]), array(c(1.4, 2.6), dim = c(2,1)), 
tolerance = 1e-1)
-
+  
+  expect_equal(as.array(arg.blocks[[2]]), array(c(1.4, 2.6), dim = c(2, 1)), 
tolerance = 0.1)
+  
 })
 
 
 test_that("rmsprop", {
-
-  data = mx.symbol.Variable('data')
-  label = mx.symbol.Variable('label')
-  fc_weight = mx.symbol.Variable('fc_weight')
-  fc = mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, 
name = 'fc1', num_hidden = 1)
-  loss = mx.symbol.LinearRegressionOutput(data = fc, label = label, name = 
'loss')
-
-  x <- mx.nd.array(array(1:6, dim=2:3))
+  
+  data <- mx.symbol.Variable("data")
+  label <- mx.symbol.Variable("label")
+  fc_weight <- mx.symbol.Variable("fc_weight")
+  fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, 
+    name = "fc1", num_hidden = 1)
+  loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = 
"loss")
+  
+  x <- mx.nd.array(array(1:6, dim = 2:3))
   y <- mx.nd.array(c(5, 11, 16))
-  w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2,1)))
-
-  exec <- mxnet:::mx.symbol.bind(symbol = loss,
-                                 ctx = mx.cpu(),
-                                 arg.arrays = list(data = x,
-                                                   fc1_weight = w1,
-                                                   label = y),
-                                 aux.arrays = NULL,
-                                 grad.reqs = c("null", "write", "null"))
-
-  optimizer <- mx.opt.create("rmsprop", learning.rate = 1,
-                             centered = TRUE,
-                             gamma1 = 0.95,
-                             gamma2 = 0.9,
-                             epsilon = 1e-4,
-                             wd = 0,
-                             rescale.grad = 1,
-                             clip_gradient = -1)
-
+  w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1)))
+  
+  exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.cpu(), arg.arrays = 
list(data = x, 
+    fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", 
"write", 
+    "null"))
+  
+  optimizer <- mx.opt.create("rmsprop", learning.rate = 1, centered = TRUE, 
gamma1 = 0.95, 
+    gamma2 = 0.9, epsilon = 1e-04, wd = 0, rescale.grad = 1, clip_gradient = 
-1)
+  
   updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = 
mx.cpu())
-
+  
   mx.exec.forward(exec, is.train = T)
   mx.exec.backward(exec)
-
+  
   arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays)
   mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE)
-
-  expect_equal(as.array(arg.blocks[[2]]), array(c(5.64, 6.38), dim = c(2,1)), 
tolerance = 1e-1)
-
+  
+  expect_equal(as.array(arg.blocks[[2]]), array(c(5.64, 6.38), dim = c(2, 1)), 
+    tolerance = 0.1)
+  
 })
 
 
 test_that("adam", {
-
-  data = mx.symbol.Variable('data')
-  label = mx.symbol.Variable('label')
-  fc_weight = mx.symbol.Variable('fc_weight')
-  fc = mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, 
name = 'fc1', num_hidden = 1)
-  loss = mx.symbol.LinearRegressionOutput(data = fc, label = label, name = 
'loss')
-
-  x <- mx.nd.array(array(1:6, dim=2:3))
+  
+  data <- mx.symbol.Variable("data")
+  label <- mx.symbol.Variable("label")
+  fc_weight <- mx.symbol.Variable("fc_weight")
+  fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, 
+    name = "fc1", num_hidden = 1)
+  loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = 
"loss")
+  
+  x <- mx.nd.array(array(1:6, dim = 2:3))
   y <- mx.nd.array(c(5, 11, 16))
-  w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2,1)))
-
-  exec <- mxnet:::mx.symbol.bind(symbol = loss,
-                                 ctx = mx.cpu(),
-                                 arg.arrays = list(data = x,
-                                                   fc1_weight = w1,
-                                                   label = y),
-                                 aux.arrays = NULL,
-                                 grad.reqs = c("null", "write", "null"))
-
-  optimizer <- mx.opt.create("adam",
-                             learning.rate = 1,
-                             beta1 = 0.9,
-                             beta2 = 0.999,
-                             epsilon = 1e-8,
-                             wd = 0,
-                             rescale.grad = 1,
-                             clip_gradient = -1)
-
+  w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1)))
+  
+  exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.cpu(), arg.arrays = 
list(data = x, 
+    fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", 
"write", 
+    "null"))
+  
+  optimizer <- mx.opt.create("adam", learning.rate = 1, beta1 = 0.9, beta2 = 
0.999, 
+    epsilon = 1e-08, wd = 0, rescale.grad = 1, clip_gradient = -1)
+  
   updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = 
mx.cpu())
-
+  
   mx.exec.forward(exec, is.train = T)
   mx.exec.backward(exec)
-
+  
   arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays)
   mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE)
-
-  expect_equal(as.array(arg.blocks[[2]]), array(c(4.26, 4.96), dim = c(2,1)), 
tolerance = 1e-1)
-
+  
+  expect_equal(as.array(arg.blocks[[2]]), array(c(4.26, 4.96), dim = c(2, 1)), 
+    tolerance = 0.1)
+  
 })
 
 
 test_that("adagrad", {
-
-  data = mx.symbol.Variable('data')
-  label = mx.symbol.Variable('label')
-  fc_weight = mx.symbol.Variable('fc_weight')
-  fc = mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, 
name = 'fc1', num_hidden = 1)
-  loss = mx.symbol.LinearRegressionOutput(data = fc, label = label, name = 
'loss')
-
-  x <- mx.nd.array(array(1:6, dim=2:3))
+  
+  data <- mx.symbol.Variable("data")
+  label <- mx.symbol.Variable("label")
+  fc_weight <- mx.symbol.Variable("fc_weight")
+  fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, 
+    name = "fc1", num_hidden = 1)
+  loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = 
"loss")
+  
+  x <- mx.nd.array(array(1:6, dim = 2:3))
   y <- mx.nd.array(c(5, 11, 16))
-  w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2,1)))
-
-  exec <- mxnet:::mx.symbol.bind(symbol = loss,
-                                 ctx = mx.cpu(),
-                                 arg.arrays = list(data = x,
-                                                   fc1_weight = w1,
-                                                   label = y),
-                                 aux.arrays = NULL,
-                                 grad.reqs = c("null", "write", "null"))
-
-  optimizer <- mx.opt.create("adagrad",
-                             learning.rate = 1,
-                             epsilon = 1e-8,
-                             wd = 0,
-                             rescale.grad = 1,
-                             clip_gradient = -1)
-
+  w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1)))
+  
+  exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.cpu(), arg.arrays = 
list(data = x, 
+    fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", 
"write", 
+    "null"))
+  
+  optimizer <- mx.opt.create("adagrad", learning.rate = 1, epsilon = 1e-08, wd 
= 0, 
+    rescale.grad = 1, clip_gradient = -1)
+  
   updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = 
mx.cpu())
-
+  
   mx.exec.forward(exec, is.train = T)
   mx.exec.backward(exec)
-
+  
   arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays)
   mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE)
-
-  expect_equal(as.array(arg.blocks[[2]]), array(c(2.1, 2.8), dim = c(2,1)), 
tolerance = 1e-1)
-
+  
+  expect_equal(as.array(arg.blocks[[2]]), array(c(2.1, 2.8), dim = c(2, 1)), 
tolerance = 0.1)
+  
 })
 
 
 test_that("adadelta", {
-
-  data = mx.symbol.Variable('data')
-  label = mx.symbol.Variable('label')
-  fc_weight = mx.symbol.Variable('fc_weight')
-  fc = mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, 
name = 'fc1', num_hidden = 1)
-  loss = mx.symbol.LinearRegressionOutput(data = fc, label = label, name = 
'loss')
-
-  x <- mx.nd.array(array(1:6, dim=2:3))
+  
+  data <- mx.symbol.Variable("data")
+  label <- mx.symbol.Variable("label")
+  fc_weight <- mx.symbol.Variable("fc_weight")
+  fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, 
+    name = "fc1", num_hidden = 1)
+  loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = 
"loss")
+  
+  x <- mx.nd.array(array(1:6, dim = 2:3))
   y <- mx.nd.array(c(5, 11, 16))
-  w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2,1)))
-
-  exec <- mxnet:::mx.symbol.bind(symbol = loss,
-                                 ctx = mx.cpu(),
-                                 arg.arrays = list(data = x,
-                                                   fc1_weight = w1,
-                                                   label = y),
-                                 aux.arrays = NULL,
-                                 grad.reqs = c("null", "write", "null"))
-
-  optimizer <- mx.opt.create("adadelta",
-                             rho = 0.90,
-                             epsilon = 1e-5,
-                             wd = 0,
-                             rescale.grad = 1,
-                             clip_gradient = -1)
-
+  w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1)))
+  
+  exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.cpu(), arg.arrays = 
list(data = x, 
+    fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", 
"write", 
+    "null"))
+  
+  optimizer <- mx.opt.create("adadelta", rho = 0.9, epsilon = 1e-05, wd = 0, 
rescale.grad = 1, 
+    clip_gradient = -1)
+  
   updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = 
mx.cpu())
-
+  
   mx.exec.forward(exec, is.train = T)
   mx.exec.backward(exec)
-
+  
   arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays)
   mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE)
-
-  expect_equal(as.array(arg.blocks[[2]]), array(c(1.11, 1.81), dim = c(2,1)), 
tolerance = 1e-1)
-
+  
+  expect_equal(as.array(arg.blocks[[2]]), array(c(1.11, 1.81), dim = c(2, 1)), 
+    tolerance = 0.1)
+  
 })
diff --git a/R-package/tests/testthat/test_random.R 
b/R-package/tests/testthat/test_random.R
index 411d0c768a6..e90011dadb2 100644
--- a/R-package/tests/testthat/test_random.R
+++ b/R-package/tests/testthat/test_random.R
@@ -3,17 +3,17 @@ require(mxnet)
 context("random")
 
 test_that("mx.runif", {
-  X <- mx.runif(shape=50000, min=0, max=1, ctx=mx.ctx.default())
-  expect_equal(X>=0, mx.nd.ones(50000))
-  expect_equal(X<=1, mx.nd.ones(50000))
-  sample_mean = mean(as.array(X))
-  expect_equal(sample_mean, 0.5, tolerance=1e-2)
+  X <- mx.runif(shape = 50000, min = 0, max = 1, ctx = mx.ctx.default())
+  expect_equal(X >= 0, mx.nd.ones(50000))
+  expect_equal(X <= 1, mx.nd.ones(50000))
+  sample_mean <- mean(as.array(X))
+  expect_equal(sample_mean, 0.5, tolerance = 0.01)
 })
 
 test_that("mx.rnorm", {
-  X <- mx.rnorm(shape=50000, mean=5, sd=0.1, ctx=mx.ctx.default())
-  sample_mean = mean(as.array(X))
-  sample_sd = sd(as.array(X))
-  expect_equal(sample_mean, 5, tolerance=1e-2)
-  expect_equal(sample_sd, 0.1, tolerance=1e-2)
+  X <- mx.rnorm(shape = 50000, mean = 5, sd = 0.1, ctx = mx.ctx.default())
+  sample_mean <- mean(as.array(X))
+  sample_sd <- sd(as.array(X))
+  expect_equal(sample_mean, 5, tolerance = 0.01)
+  expect_equal(sample_sd, 0.1, tolerance = 0.01)
 })
diff --git a/R-package/tests/testthat/test_symbol.R 
b/R-package/tests/testthat/test_symbol.R
index 656d146cd87..4a253fbd3e7 100644
--- a/R-package/tests/testthat/test_symbol.R
+++ b/R-package/tests/testthat/test_symbol.R
@@ -3,71 +3,73 @@ require(mxnet)
 context("symbol")
 
 test_that("basic symbol operation", {
-  data = mx.symbol.Variable('data')
-  net1 = mx.symbol.FullyConnected(data = data, name = 'fc1', num_hidden = 10)
-  net1 = mx.symbol.FullyConnected(data = net1, name = 'fc2', num_hidden = 100)
+  data <- mx.symbol.Variable("data")
+  net1 <- mx.symbol.FullyConnected(data = data, name = "fc1", num_hidden = 10)
+  net1 <- mx.symbol.FullyConnected(data = net1, name = "fc2", num_hidden = 100)
   
-  expect_equal(arguments(net1), c('data', 'fc1_weight', 'fc1_bias', 
'fc2_weight', 'fc2_bias'))
-  expect_equal(outputs(net1), 'fc2_output')
+  expect_equal(arguments(net1), c("data", "fc1_weight", "fc1_bias", 
"fc2_weight", 
+    "fc2_bias"))
+  expect_equal(outputs(net1), "fc2_output")
   
-  net2 = mx.symbol.FullyConnected(name = 'fc3', num_hidden = 10)
-  net2 = mx.symbol.Activation(data = net2, act_type = 'relu')
-  net2 = mx.symbol.FullyConnected(data = net2, name = 'fc4', num_hidden = 20)
+  net2 <- mx.symbol.FullyConnected(name = "fc3", num_hidden = 10)
+  net2 <- mx.symbol.Activation(data = net2, act_type = "relu")
+  net2 <- mx.symbol.FullyConnected(data = net2, name = "fc4", num_hidden = 20)
   
-  composed = mx.apply(net2, fc3_data = net1, name = 'composed')
+  composed <- mx.apply(net2, fc3_data = net1, name = "composed")
   
-  expect_equal(arguments(composed), c('data', 'fc1_weight', 'fc1_bias', 
'fc2_weight', 'fc2_bias', 'fc3_weight', 'fc3_bias', 'fc4_weight', 'fc4_bias'))
-  expect_equal(outputs(composed), 'composed_output')
+  expect_equal(arguments(composed), c("data", "fc1_weight", "fc1_bias", 
"fc2_weight", 
+    "fc2_bias", "fc3_weight", "fc3_bias", "fc4_weight", "fc4_bias"))
+  expect_equal(outputs(composed), "composed_output")
   
-  multi_out = mx.symbol.Group(c(composed, net1))
-  expect_equal(outputs(multi_out), c('composed_output', 'fc2_output'))
+  multi_out <- mx.symbol.Group(c(composed, net1))
+  expect_equal(outputs(multi_out), c("composed_output", "fc2_output"))
 })
 
 test_that("symbol internal", {
-  data = mx.symbol.Variable('data')
-  oldfc = mx.symbol.FullyConnected(data = data, name = 'fc1', num_hidden = 10)
-  net1 = mx.symbol.FullyConnected(data = oldfc, name = 'fc2', num_hidden = 100)
+  data <- mx.symbol.Variable("data")
+  oldfc <- mx.symbol.FullyConnected(data = data, name = "fc1", num_hidden = 10)
+  net1 <- mx.symbol.FullyConnected(data = oldfc, name = "fc2", num_hidden = 
100)
   
-  expect_equal(arguments(net1), c("data", "fc1_weight", "fc1_bias", 
"fc2_weight", "fc2_bias"))
+  expect_equal(arguments(net1), c("data", "fc1_weight", "fc1_bias", 
"fc2_weight", 
+    "fc2_bias"))
   
-  internal = internals(net1)
-  fc1 = internal[[match("fc1_output", internal$outputs)]]
+  internal <- internals(net1)
+  fc1 <- internal[[match("fc1_output", internal$outputs)]]
   
   expect_equal(arguments(fc1), arguments(oldfc))
 })
 
 test_that("symbol children", {
-  data = mx.symbol.Variable('data')
-  oldfc = mx.symbol.FullyConnected(data = data,
-                                   name = 'fc1',
-                                   num_hidden = 10)
-  net1 = mx.symbol.FullyConnected(data = oldfc, name = 'fc2', num_hidden = 100)
+  data <- mx.symbol.Variable("data")
+  oldfc <- mx.symbol.FullyConnected(data = data, name = "fc1", num_hidden = 10)
+  net1 <- mx.symbol.FullyConnected(data = oldfc, name = "fc2", num_hidden = 
100)
   
-  expect_equal(outputs(children(net1)), c('fc1_output', 'fc2_weight', 
'fc2_bias'))
-  expect_equal(outputs(children(children(net1))), c('data', 'fc1_weight', 
'fc1_bias'))
+  expect_equal(outputs(children(net1)), c("fc1_output", "fc2_weight", 
"fc2_bias"))
+  expect_equal(outputs(children(children(net1))), c("data", "fc1_weight", 
"fc1_bias"))
   
-  net2 = net1$get.children()
-  expect_equal(net2[[match('fc2_weight', net2$outputs)]]$arguments, 
'fc2_weight')
+  net2 <- net1$get.children()
+  expect_equal(net2[[match("fc2_weight", net2$outputs)]]$arguments, 
"fc2_weight")
   
-  data = mx.symbol.Variable('data')
-  sliced = mx.symbol.SliceChannel(data, num_outputs = 3, name = 'slice')
-  expect_equal(outputs(children(sliced)), 'data')
+  data <- mx.symbol.Variable("data")
+  sliced <- mx.symbol.SliceChannel(data, num_outputs = 3, name = "slice")
+  expect_equal(outputs(children(sliced)), "data")
 })
 
 test_that("symbol infer type", {
-  num_hidden = 128
-  num_dim    = 64
-  num_sample = 10
+  num_hidden <- 128
+  num_dim <- 64
+  num_sample <- 10
   
-  data = mx.symbol.Variable('data')
-  prev = mx.symbol.Variable('prevstate')
-  x2h  = mx.symbol.FullyConnected(data = data, name = 'x2h', num_hidden = 
num_hidden)
-  h2h  = mx.symbol.FullyConnected(data = prev, name = 'h2h', num_hidden = 
num_hidden)
+  data <- mx.symbol.Variable("data")
+  prev <- mx.symbol.Variable("prevstate")
+  x2h <- mx.symbol.FullyConnected(data = data, name = "x2h", num_hidden = 
num_hidden)
+  h2h <- mx.symbol.FullyConnected(data = prev, name = "h2h", num_hidden = 
num_hidden)
   
-  out  = mx.symbol.Activation(data = mx.symbol.elemwise_add(x2h, h2h), name = 
'out', act_type = 'relu')
+  out <- mx.symbol.Activation(data = mx.symbol.elemwise_add(x2h, h2h), name = 
"out", 
+    act_type = "relu")
   
   # shape inference will fail because information is not available for h2h
-  ret = mx.symbol.infer.shape(out, data = c(num_dim, num_sample))
+  ret <- mx.symbol.infer.shape(out, data = c(num_dim, num_sample))
   
   expect_equal(ret, NULL)
 })
@@ -77,7 +79,7 @@ test_that("symbol save/load", {
   fc1 <- mx.symbol.FullyConnected(data, num_hidden = 1)
   lro <- mx.symbol.LinearRegressionOutput(fc1)
   mx.symbol.save(lro, "tmp_r_sym.json")
-  data2 = mx.symbol.load("tmp_r_sym.json")
+  data2 <- mx.symbol.load("tmp_r_sym.json")
   
   expect_equal(data2$as.json(), lro$as.json())
   file.remove("tmp_r_sym.json")
@@ -85,12 +87,12 @@ test_that("symbol save/load", {
 
 test_that("symbol attributes access", {
   str <- "(1, 1, 1, 1)"
-  x = mx.symbol.Variable('x')
+  x <- mx.symbol.Variable("x")
   x$attributes <- list(`__shape__` = str)
   
   expect_equal(x$attributes$`__shape__`, str)
   
-  y = mx.symbol.Variable('y')
+  y <- mx.symbol.Variable("y")
   y$attributes$`__shape__` <- str
   
   expect_equal(y$attributes$`__shape__`, str)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] hetong007 closed pull request #12360: [MXNET-690] Add tests for initializers in R

Reply via email to