[GitHub] [systemds] Baunsgaard commented on a change in pull request #1324: [WIP] [AMLS] GAN mnist

GitBox Mon, 12 Jul 2021 13:06:47 -0700


Baunsgaard commented on a change in pull request #1324:
URL: https://github.com/apache/systemds/pull/1324#discussion_r667807010




##########
File path: src/test/scripts/applications/GAN/GAN_cnn.dml
##########
@@ -0,0 +1,510 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("nn/layers/affine.dml") as affine
+source("nn/layers/leaky_relu.dml") as leaky_relu
+source("nn/layers/conv2d_builtin.dml") as conv2d
+source("nn/layers/conv2d_transpose.dml") as conv2d_transpose
+source("nn/layers/log_loss.dml") as log_loss
+source("nn/layers/dropout.dml") as dropout
+source("nn/layers/batch_norm1d.dml") as batch_norm_1d
+source("nn/layers/batch_norm2d.dml") as batch_norm_2d
+source("nn/layers/softmax.dml") as softmax
+source("nn/layers/sigmoid.dml") as sigmoid
+source("nn/layers/tanh.dml") as tanh
+source("nn/optim/adam.dml") as adam
+
+train = function(matrix[double] X, int iterations)
+    return (matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, 
matrix[double] Gb_2, matrix[double] GW_3,
+            matrix[double] Gb_3, matrix[double] GW_4, matrix[double] Gb_4, 
matrix[double] DW_1, matrix[double] Db_1,
+            matrix[double] DW_2, matrix[double] Db_2, matrix[double] DW_3, 
matrix[double] Db_3)
+{
+/*
+   * Trains the generator and the discriminator of the GAN.
+   *
+   * The input matrix, X, has N examples, each with 784 features.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, 784).
+   *  - iterations: number of iterations for training
+   *
+   * Outputs:
+   *  - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, 
D).
+   *  - Gb_1: Generator 1st layer biases vector, of shape (1, D).
+   *  - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (256, 
128*HWf*HWf).
+   *  - Gb_2: Generator 2nd layer biases vector, of shape (128, 1).
+   *  - GW_3: Generator 3rd layer weights (parameters) matrix, of shape (128, 
64*HWf*HWf).
+   *  - Gb_3: Generator 3rd layer biases vector, of shape (64, 1).
+   *  - GW_4: Generator 4th layer weights (parameters) matrix, of shape (64, 
1*HWf*HWf).
+   *  - Gb_4: Generator 4th layer biases vector, of shape (1, 1).
+   *  - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape 
(64, 1*HWf*HWf).
+   *  - Db_1: Discriminator 1st layer biases vector, of shape (64, 1).
+   *  - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape 
(128, 64*HWf*HWf).
+   *  - Db_2: Discriminator 2nd layer biases vector, of shape (128, 1).
+   *  - DW_3: Discriminator 3rd layer weights (parameters) matrix, of shape 
(6272, 1).
+   *  - Db_3: Discriminator 3rd layer biases vector, of shape (1, 1).
+*/
+    N = nrow(X)
+    batch_size = 128
+    half_batch = batch_size / 2
+    D = 7*7*256
+    HWf = 5
+
+    #Define Generator:
+    [GW_1, Gb_1] = affine::init(100, D, -1)
+    [GW_2, Gb_2] = conv2d_transpose::init(128, 256, HWf, HWf)
+    [GW_3, Gb_3] = conv2d_transpose::init(64, 128, HWf, HWf)
+    [GW_4, Gb_4] = conv2d_transpose::init(1, 64, HWf, HWf)
+    [mGW_1, vGW_1] = adam::init(GW_1)
+    [mGb_1, vGb_1] = adam::init(Gb_1)
+    [mGW_2, vGW_2] = adam::init(GW_2)
+    [mGb_2, vGb_2] = adam::init(Gb_2)
+    [mGW_3, vGW_3] = adam::init(GW_3)
+    [mGb_3, vGb_3] = adam::init(Gb_3)
+    [mGW_4, vGW_4] = adam::init(GW_4)
+    [mGb_4, vGb_4] = adam::init(Gb_4)
+
+    gen_model = list(GW_1, Gb_1, GW_2, Gb_2, GW_3, Gb_3, GW_4, Gb_4)
+    gen_grad = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2, 
mGW_3, vGW_3, mGb_3, vGb_3, mGW_4, vGW_4, mGb_4, vGb_4)
+
+    #Define Discriminator:
+    [DW_1, Db_1] = conv2d::init(64, 1, HWf, HWf, -1)
+    [DW_2, Db_2] = conv2d::init(128, 64, HWf, HWf, -1)
+    [DW_3, Db_3] = affine::init(6272, 1, -1)
+    [mDW_1, vDW_1] = adam::init(DW_1)
+    [mDb_1, vDb_1] = adam::init(Db_1)
+    [mDW_2, vDW_2] = adam::init(DW_2)
+    [mDb_2, vDb_2] = adam::init(Db_2)
+    [mDW_3, vDW_3] = adam::init(DW_3)
+    [mDb_3, vDb_3] = adam::init(Db_3)
+
+    disc_model = list(DW_1, Db_1, DW_2, Db_2, DW_3, Db_3)
+    disc_grad = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2, 
mDW_3, vDW_3, mDb_3, vDb_3)
+
+    fake = matrix(0, 0, 784)
+
+    for(i in 1:iterations)
+    {
+        print('step ' + toString(i) + ' / ' + toString(iterations))
+        #generate samples
+        noise = rand(rows = half_batch, cols = 100, min = 0.0, max = 1.0)
+        [fake_images, gen_params] = gen_forward(noise, gen_model, 'train')
+        rand = sample(N, half_batch)
+        real_images = matrix(0, half_batch, 784)
+        for(r in 1:half_batch)
+        {
+            real_images[r,] = X[as.scalar(rand[r]),]
+        }
+
+        #train discriminator
+        [decision, disc_params] = disc_forward(real_images, disc_model)
+        targets = matrix(1, half_batch, 1)
+        dloss1 = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, 
i, disc_model, disc_grad, disc_params)
+        [decision, disc_params] = disc_forward(fake_images, disc_model)
+        targets = matrix(0, half_batch, 1)
+        dloss2 = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, 
i, disc_model, disc_grad, disc_params)
+        print('discriminator_loss: ' + toString((dloss1 + dloss2)))
+
+        #generate samples
+        noise = rand(rows = batch_size, cols = 100, min = 0.0, max = 1.0)
+        [fake_images, gen_params] = gen_forward(noise, gen_model, 'train')
+
+        #train generator
+        [decision, disc_params] = disc_forward(fake_images, disc_model)
+        targets = matrix(1, batch_size, 1)
+        gloss = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, TRUE, 
i, disc_model, disc_grad, disc_params)
+        [gen_model, gen_grad] = gen_backward(dX, i, gen_model, gen_grad, 
gen_params, 'train')
+        print('generator_loss: ' + toString(gloss))
+
+        # get sample generated image to observe evolution of generated images
+        if(i %% (iterations/10) == 0)
+        {
+            fake = rbind(fake, fake_images[1])
+        }
+    }
+    out_dir = "target/testTemp/applications/GAN/GANTest/"
+    fake = 0.5 * fake + 0.5
+    write(fake, out_dir+"/evo")
+    DW_1 = as.matrix(disc_model[1])
+    Db_1 = as.matrix(disc_model[2])
+    DW_2 = as.matrix(disc_model[3])
+    Db_2 = as.matrix(disc_model[4])
+    DW_3 = as.matrix(disc_model[5])
+    Db_3 = as.matrix(disc_model[6])
+    GW_1 = as.matrix(gen_model[1])
+    Gb_1 = as.matrix(gen_model[2])
+    GW_2 = as.matrix(gen_model[3])
+    Gb_2 = as.matrix(gen_model[4])
+    GW_3 = as.matrix(gen_model[5])
+    Gb_3 = as.matrix(gen_model[6])
+    GW_4 = as.matrix(gen_model[7])
+    Gb_4 = as.matrix(gen_model[8])
+}
+
+gen_forward = function(matrix[double] noise, list[unknown] model, String mode)
+    return(matrix[double] images, list[unknown] params)
+{
+/*
+   * Computes the forward pass of the generator.
+   * Generates fake images from input noise.
+   *
+   * Inputs:
+   *  - noise: Randomly generated noise, of shape (N, 100).
+   *  - model: List containing the generator weights and biases.
+   *  - mode: 'train' or 'test' for batch normalization layers.
+   *
+   * Outputs:
+   *  - images: Generated images, of shape (N, 784).
+   *  - params: List of outputs of the generator layers, needed for backward 
pass.
+*/
+    D = 7*7*256
+    HWf = 5
+    pad = 2
+    stride = 2
+
+    GW_1 = as.matrix(model[1])
+    Gb_1 = as.matrix(model[2])
+    GW_2 = as.matrix(model[3])
+    Gb_2 = as.matrix(model[4])
+    GW_3 = as.matrix(model[5])
+    Gb_3 = as.matrix(model[6])
+    GW_4 = as.matrix(model[7])
+    Gb_4 = as.matrix(model[8])
+
+    #Generator forward:
+    #Layer 1
+    out_1G = affine::forward(noise, GW_1, Gb_1)
+    [out_1G_batch_norm, ema_mean_upd_1, ema_var_upd_1, cache_mean_1, 
cache_var_1, cache_norm_1] = batch_norm_1d::forward(out_1G,
+                                                matrix(1,1,D), matrix(0,1,D), 
mode, matrix(0,1,D), matrix(1,1,D), 0.99, 0.001)
+    out_1G_leaky_relu = leaky_relu::forward(out_1G_batch_norm)
+    #Layer 2
+    [out_2G, hout_2G, wout_2G] = conv2d_transpose::forward(out_1G_leaky_relu, 
GW_2, Gb_2, 256, 7, 7, HWf, HWf, 1, 1,
+                                                                   pad, pad, 
0, 0)
+    [out_2G_batch_norm, ema_mean_upd_2, ema_var_upd_2, cache_mean_2, 
cache_inv_var_2] = batch_norm_2d::forward(out_2G,
+                matrix(1,128,1), matrix(0,128,1), 128, hout_2G, wout_2G, mode, 
matrix(0,128,1), matrix(1,128,1), 0.99, 0.001)
+    out_2G_leaky_relu = leaky_relu::forward(out_2G_batch_norm)
+
+    #Layer 3
+    [out_3G, hout_3G, wout_3G] = conv2d_transpose::forward(out_2G_leaky_relu, 
GW_3, Gb_3, 128, hout_2G, wout_2G, HWf,
+                                                                   HWf, 
stride, stride, pad, pad, 1, 1)
+    [out_3G_batch_norm, ema_mean_upd_3, ema_var_upd_3, cache_mean_3, 
cache_inv_var_3] = batch_norm_2d::forward(out_3G,
+                matrix(1,64,1), matrix(0,64,1), 64, hout_3G, wout_3G, mode, 
matrix(0,64,1), matrix(1,64,1), 0.99, 0.001)
+    out_3G_leaky_relu = leaky_relu::forward(out_3G_batch_norm)
+
+    #Output Layer
+    [out_4G, hout_4G, wout_4G] = conv2d_transpose::forward(out_3G_leaky_relu, 
GW_4, Gb_4, 64, hout_3G, wout_3G, HWf,
+                                                                   HWf, 
stride, stride, pad, pad, 1, 1)
+    out_4G_tanh = tanh::forward(out_4G)
+
+    images = out_4G_tanh
+    params = list(noise, out_1G, out_1G_batch_norm, ema_mean_upd_1, 
ema_var_upd_1, cache_mean_1, cache_var_1,
+                   cache_norm_1, out_1G_leaky_relu, out_2G, hout_2G, wout_2G, 
out_2G_batch_norm, cache_mean_2, cache_inv_var_2,
+                   out_2G_leaky_relu, out_3G, hout_3G, wout_3G, 
out_3G_batch_norm, cache_mean_3, cache_inv_var_3, out_3G_leaky_relu,
+                   out_4G, hout_4G, wout_4G)
+}
+
+disc_forward = function(matrix[double] X, list[unknown] model)
+    return(matrix[double] decision, list[unknown] params)
+{
+/*
+   * Computes the forward pass of the discriminator.
+   * Decides if input images are real or fake.
+   *
+   * Inputs:
+   *  - X: Input matrix containing sample images, of shape (N, 784).
+   *  - model: List containing the discriminator weights and biases.
+   *
+   * Outputs:
+   *  - decision: Decisions for realness of input, of shape (N, 1).
+   *  - params: List of outputs of the discriminator layers, needed for 
backward pass.
+*/
+    HWin = 28
+    HWf = 5
+    pad = 2
+    stride = 2
+
+    DW_1 = as.matrix(model[1])
+    Db_1 = as.matrix(model[2])
+    DW_2 = as.matrix(model[3])
+    Db_2 = as.matrix(model[4])
+    DW_3 = as.matrix(model[5])
+    Db_3 = as.matrix(model[6])
+
+    #Discriminator forward
+    #Layer 1
+    [out_1D, hout_1D, wout_1D] = conv2d::forward(X, DW_1, Db_1, 1, HWin, HWin, 
HWf, HWf, stride, stride, pad, pad)
+    out_1D_leaky_relu = leaky_relu::forward(out_1D)
+    [out_1D_dropout, mask_1] = dropout::forward(out_1D_leaky_relu, 0.3, -1)
+
+    #Layer 2
+    [out_2D, hout_2D, wout_2D] = conv2d::forward(out_1D_dropout, DW_2, Db_2, 
64, hout_1D, wout_1D, HWf, HWf, stride,
+                                                         stride, pad, pad)
+    out_2D_leaky_relu = leaky_relu::forward(out_2D)
+    [out_2D_dropout, mask_2] = dropout::forward(out_2D_leaky_relu, 0.3, -1)
+
+    #Output Layer
+    out_3D = affine::forward(out_2D_dropout, DW_3, Db_3)
+    decision = sigmoid::forward(out_3D)
+    params = list(X, out_1D, hout_1D, wout_1D, out_1D_leaky_relu, 
out_1D_dropout, mask_1, out_2D, hout_2D, wout_2D,
+                  out_2D_leaky_relu, out_2D_dropout, mask_2, out_3D)
+}
+
+disc_backward = function(matrix[double] decision, matrix[double] targets, 
boolean lock, int iteration, list[unknown] model, list[unknown] gradients,
+                         list[unknown] params)
+    return(matrix[double] dX, list[unknown] model, list[unknown] gradients)
+{
+/*
+   * Computes the backward pass of the discriminator.
+   * Updates gradients and weights of the discriminator.
+   *
+   * Inputs:
+   *  - decisions: Input matrix containing discriminator decisions, of shape 
(N, 1).
+   *  - targets: Target values for the decisions, of shape (N, 1).
+   *  - lock: Boolean that governs if discriminator weights are to be updated, 
TRUE means the weights are not updated.
+   *  - iteration: Current iteration of the training.
+   *  - model: List containing the discriminator weights and biases.
+   *  - gradients: List containing the discriminator gradients.
+   *  - params: List of outputs of the discriminator layers from the forward 
pass.
+   *
+   * Outputs:
+   *  - dX: Gradient wrt `X`, of shape (N, 784).
+   *  - model: List containing the updated discriminator weights and biases.
+   *  - gradients: List containing the updated discriminator gradients.
+*/
+    HWin = 28
+    HWf = 5
+    pad = 2
+    stride = 2
+
+    lr = 0.0002
+    beta1 = 0.5
+    beta2 = 0.999
+    epsilon = 1e-07
+
+    DW_1 = as.matrix(model[1])
+    Db_1 = as.matrix(model[2])
+    DW_2 = as.matrix(model[3])
+    Db_2 = as.matrix(model[4])
+    DW_3 = as.matrix(model[5])
+    Db_3 = as.matrix(model[6])
+
+    mDW_1 = as.matrix(gradients[1])
+    vDW_1 = as.matrix(gradients[2])
+    mDb_1 = as.matrix(gradients[3])
+    vDb_1 = as.matrix(gradients[4])
+    mDW_2 = as.matrix(gradients[5])
+    vDW_2 = as.matrix(gradients[6])
+    mDb_2 = as.matrix(gradients[7])
+    vDb_2 = as.matrix(gradients[8])
+    mDW_3 = as.matrix(gradients[9])
+    vDW_3 = as.matrix(gradients[10])
+    mDb_3 = as.matrix(gradients[11])
+    vDb_3 = as.matrix(gradients[12])
+
+    #Discriminator backward
+    #Output Layer
+    dloss = log_loss::backward(decision, targets)
+    dout_3D = sigmoid::backward(dloss, as.matrix(params[14]))
+    [dout_2D, dDW_3, dDb_3] = affine::backward(dout_3D, as.matrix(params[12]), 
DW_3, Db_3)
+
+    #Layer 2
+    dout_2D_dropout = dropout::backward(dout_2D, as.matrix(params[11]), 0.3, 
as.matrix(params[13]))
+    dout_2D_leaky_relu = leaky_relu::backward(dout_2D_dropout, 
as.matrix(params[8]))
+    [dout_1D, dDW_2, dDb_2] = conv2d::backward(dout_2D_leaky_relu, 
as.scalar(params[9]), as.scalar(params[10]),
+                                               as.matrix(params[6]), DW_2, 
Db_2, 64, as.scalar(params[3]),
+                                               as.scalar(params[4]), HWf, HWf, 
stride, stride, pad, pad)
+
+    #Layer 1
+    dout_1D_dropout = dropout::backward(dout_1D, as.matrix(params[5]), 0.3, 
as.matrix(params[7]))
+    dout_1D_leaky_relu = leaky_relu::backward(dout_1D_dropout, 
as.matrix(params[2]))
+    [dX, dDW_1, dDb_1] = conv2d::backward(dout_1D_leaky_relu, 
as.scalar(params[3]), as.scalar(params[4]),
+                                          as.matrix(params[1]), DW_1, Db_1, 1, 
HWin, HWin, HWf, HWf, stride, stride,
+                                          pad, pad)
+
+    if(!lock)
+    {
+        #optimize
+        [DW_1, mDW_1, vDW_1] = adam::update(DW_1, dDW_1, lr, beta1, beta2, 
epsilon, iteration, mDW_1, vDW_1)
+        [Db_1, mDb_1, vDb_1] = adam::update(Db_1, dDb_1, lr, beta1, beta2, 
epsilon, iteration, mDb_1, vDb_1)
+        [DW_2, mDW_2, vDW_2] = adam::update(DW_2, dDW_2, lr, beta1, beta2, 
epsilon, iteration, mDW_2, vDW_2)
+        [Db_2, mDb_2, vDb_2] = adam::update(Db_2, dDb_2, lr, beta1, beta2, 
epsilon, iteration, mDb_2, vDb_2)
+        [DW_3, mDW_3, vDW_3] = adam::update(DW_3, dDW_3, lr, beta1, beta2, 
epsilon, iteration, mDW_3, vDW_3)
+        [Db_3, mDb_3, vDb_3] = adam::update(Db_3, dDb_3, lr, beta1, beta2, 
epsilon, iteration, mDb_3, vDb_3)
+
+        model = list(DW_1, Db_1, DW_2, Db_2, DW_3, Db_3)
+        gradients = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, 
vDb_2, mDW_3, vDW_3, mDb_3, vDb_3)

Review comment:
       I'm not sure what the intention is, but this method returns the input if 
the lock is true.
   are you sure that you don't want to return the updated gradients in an else 
statement?
   Also i don't think you need to input gradients to this function.

##########
File path: src/test/scripts/applications/GAN/GAN_simple.dml
##########
@@ -0,0 +1,358 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("nn/layers/affine.dml") as affine
+source("nn/layers/leaky_relu.dml") as leaky_relu
+source("nn/layers/log_loss.dml") as log_loss
+source("nn/layers/softmax.dml") as softmax
+source("nn/layers/sigmoid.dml") as sigmoid
+source("nn/layers/tanh.dml") as tanh
+source("nn/optim/adam.dml") as adam
+
+train = function(matrix[double] X, int iterations)
+    return (matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, 
matrix[double] Gb_2, matrix[double] DW_1,
+            matrix[double] Db_1, matrix[double] DW_2, matrix[double] Db_2)
+{
+/*
+   * Trains the generator and the discriminator of the GAN.
+   *
+   * The input matrix, X, has N examples, each with 784 features.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, 784).
+   *  - iterations: number of iterations for training
+   *
+   * Outputs:
+   *  - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, 
128).
+   *  - Gb_1: Generator 1st layer biases vector, of shape (1, 128).
+   *  - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (128, 
784).
+   *  - Gb_2: Generator 2st layer biases vector, of shape (1, 784).
+   *  - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape 
(784, 128).
+   *  - Db_1: Discriminator 1st layer biases vector, of shape (1, 128).
+   *  - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape 
(128, 1).
+   *  - Db_2: Discriminator 2nd layer biases vector, of shape (1, 1).
+*/
+    N = nrow(X)
+    batch_size = 128
+    half_batch = batch_size / 2
+
+    #Define Generator:
+    [GW_1, Gb_1] = affine::init(100, 128, -1)
+    [GW_2, Gb_2] = affine::init(128, 28*28, -1)
+    [mGW_1, vGW_1] = adam::init(GW_1)
+    [mGb_1, vGb_1] = adam::init(Gb_1)
+    [mGW_2, vGW_2] = adam::init(GW_2)
+    [mGb_2, vGb_2] = adam::init(Gb_2)
+
+    gen_model = list(GW_1, Gb_1, GW_2, Gb_2)
+    gen_grad = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2)
+
+    #Define Discriminator:
+    [DW_1, Db_1] = affine::init(28*28, 128, -1)
+    [DW_2, Db_2] = affine::init(128, 1, -1)
+    [mDW_1, vDW_1] = adam::init(DW_1)
+    [mDb_1, vDb_1] = adam::init(Db_1)
+    [mDW_2, vDW_2] = adam::init(DW_2)
+    [mDb_2, vDb_2] = adam::init(Db_2)
+
+    disc_model = list(DW_1, Db_1, DW_2, Db_2)
+    disc_grad = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2)
+
+    fake = matrix(0, 0, 784)
+
+    for(i in 1:iterations)
+    {
+        print('step ' + toString(i) + ' / ' + toString(iterations))
+        #generate samples
+        noise = rand(rows = half_batch, cols = 100, min = 0.0, max = 1.0)
+        [fake_images, gen_params] = gen_forward(noise, gen_model)
+        rand = sample(N, half_batch)
+        real_images = matrix(0, half_batch, 784)
+        for(r in 1:half_batch)
+        {
+            real_images[r,] = X[as.scalar(rand[r]),]
+        }
+
+        #train discriminator
+        [decision, disc_params] = disc_forward(real_images, disc_model)
+        targets = matrix(1, half_batch, 1)
+        dloss1 = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, 
i, disc_model, disc_grad, disc_params)
+        [decision, disc_params] = disc_forward(fake_images, disc_model)
+        targets = matrix(0, half_batch, 1)
+        dloss2 = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, 
i, disc_model, disc_grad, disc_params)
+        print('discriminator_loss: ' + toString((dloss1 + dloss2)))
+
+        #generate samples
+        noise = rand(rows = batch_size, cols = 100, min = 0.0, max = 1.0)
+        [fake_images, gen_params] = gen_forward(noise, gen_model)
+
+        #train generator
+        [decision, disc_params] = disc_forward(fake_images, disc_model)
+        targets = matrix(1, batch_size, 1)
+        gloss = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, TRUE, 
i, disc_model, disc_grad, disc_params)
+        [gen_model, gen_grad] = gen_backward(dX, i, gen_model, gen_grad, 
gen_params)
+        print('generator_loss: ' + toString(gloss))
+
+        # get sample generated image to observe evolution of generated images
+        if(i %% (iterations/10) == 0)
+        {
+            fake = rbind(fake, fake_images[1])
+        }
+    }
+    out_dir = "target/testTemp/applications/GAN/GANTest/"
+    fake = 0.5 * fake + 0.5
+    write(fake, out_dir+"/evo")
+    DW_1 = as.matrix(disc_model[1])
+    Db_1 = as.matrix(disc_model[2])
+    DW_2 = as.matrix(disc_model[3])
+    Db_2 = as.matrix(disc_model[4])
+    GW_1 = as.matrix(gen_model[1])
+    Gb_1 = as.matrix(gen_model[2])
+    GW_2 = as.matrix(gen_model[3])
+    Gb_2 = as.matrix(gen_model[4])
+}
+
+gen_forward = function(matrix[double] noise, list[unknown] model)
+    return(matrix[double] images, list[unknown] params)
+{
+/*
+   * Computes the forward pass of the generator.
+   * Generates fake images from input noise.
+   *
+   * Inputs:
+   *  - noise: Randomly generated noise, of shape (N, 100).
+   *  - model: List containing the generator weights and biases.
+   *
+   * Outputs:
+   *  - images: Generated images, of shape (N, 784).
+   *  - params: List of outputs of the generator layers, needed for backward 
pass.
+*/
+    GW_1 = as.matrix(model[1])
+    Gb_1 = as.matrix(model[2])
+    GW_2 = as.matrix(model[3])
+    Gb_2 = as.matrix(model[4])
+
+    #Generator forward:
+    #Layer 1
+    out_1G = affine::forward(noise, GW_1, Gb_1)
+    out_1G_leaky_relu = leaky_relu::forward(out_1G)
+    #Layer 2
+    out_2G = affine::forward(out_1G_leaky_relu, GW_2, Gb_2)
+    out_2G_tanh = tanh::forward(out_2G)
+    images = out_2G_tanh
+    params = list(noise, out_1G, out_1G_leaky_relu, out_2G)
+}
+
+disc_forward = function(matrix[double] X, list[unknown] model)
+    return(matrix[double] decision, list[unknown] params)
+{
+/*
+   * Computes the forward pass of the discriminator.
+   * Decides if input images are real or fake.
+   *
+   * Inputs:
+   *  - X: Input matrix containing sample images, of shape (N, 784).
+   *  - model: List containing the discriminator weights and biases.
+   *
+   * Outputs:
+   *  - decision: Decisions for realness of input, of shape (N, 1).
+   *  - params: List of outputs of the discriminator layers, needed for 
backward pass.
+*/
+    DW_1 = as.matrix(model[1])
+    Db_1 = as.matrix(model[2])
+    DW_2 = as.matrix(model[3])
+    Db_2 = as.matrix(model[4])
+
+    #Discriminator forward
+    #Layer 1
+    out_1D = affine::forward(X, DW_1, Db_1)
+    out_1D_leaky_relu = leaky_relu::forward(out_1D)
+
+    #Layer 2
+    out_2D = affine::forward(out_1D_leaky_relu, DW_2, Db_2)
+    decision = sigmoid::forward(out_2D)
+    params = list(X, out_1D, out_1D_leaky_relu, out_2D)
+}
+
+disc_backward = function(matrix[double] decision, matrix[double] targets, 
boolean lock, int iteration, list[unknown] model, list[unknown] gradients,
+                         list[unknown] params)
+    return(matrix[double] dX, list[unknown] model, list[unknown] gradients)
+{
+/*
+   * Computes the backward pass of the discriminator.
+   * Updates gradients and weights of the discriminator.
+   *
+   * Inputs:
+   *  - decisions: Input matrix containing discriminator decisions, of shape 
(N, 1).
+   *  - targets: Target values for the decisions, of shape (N, 1).
+   *  - lock: Boolean that governs if discriminator weights are to be updated, 
TRUE means the weights are not updated.
+   *  - iteration: Current iteration of the training.
+   *  - model: List containing the discriminator weights and biases.
+   *  - gradients: List containing the discriminator gradients.
+   *  - params: List of outputs of the discriminator layers from the forward 
pass.
+   *
+   * Outputs:
+   *  - dX: Gradient wrt `X`, of shape (N, 784).
+   *  - model: List containing the updated discriminator weights and biases.
+   *  - gradients: List containing the updated discriminator gradients.
+*/
+    lr = 0.0002
+    beta1 = 0.5
+    beta2 = 0.999
+    epsilon = 1e-07
+
+    DW_1 = as.matrix(model[1])
+    Db_1 = as.matrix(model[2])
+    DW_2 = as.matrix(model[3])
+    Db_2 = as.matrix(model[4])
+
+    mDW_1 = as.matrix(gradients[1])
+    vDW_1 = as.matrix(gradients[2])
+    mDb_1 = as.matrix(gradients[3])
+    vDb_1 = as.matrix(gradients[4])
+    mDW_2 = as.matrix(gradients[5])
+    vDW_2 = as.matrix(gradients[6])
+    mDb_2 = as.matrix(gradients[7])
+    vDb_2 = as.matrix(gradients[8])
+
+    #Discriminator backward
+    #Layer 2
+    dloss = log_loss::backward(decision, targets)
+    dout_2D = sigmoid::backward(dloss, as.matrix(params[4]))
+    [dout_1D, dDW_2, dDb_2] = affine::backward(dout_2D, as.matrix(params[3]), 
DW_2, Db_2)
+
+    #Layer 1
+    dout_1D_leaky_relu = leaky_relu::backward(dout_1D, as.matrix(params[2]))
+    [dX, dDW_1, dDb_1] = affine::backward(dout_1D_leaky_relu, 
as.matrix(params[1]), DW_1, Db_1)
+
+    if(!lock)
+    {
+        #optimize
+        [DW_1, mDW_1, vDW_1] = adam::update(DW_1, dDW_1, lr, beta1, beta2, 
epsilon, iteration, mDW_1, vDW_1)
+        [Db_1, mDb_1, vDb_1] = adam::update(Db_1, dDb_1, lr, beta1, beta2, 
epsilon, iteration, mDb_1, vDb_1)
+        [DW_2, mDW_2, vDW_2] = adam::update(DW_2, dDW_2, lr, beta1, beta2, 
epsilon, iteration, mDW_2, vDW_2)
+        [Db_2, mDb_2, vDb_2] = adam::update(Db_2, dDb_2, lr, beta1, beta2, 
epsilon, iteration, mDb_2, vDb_2)
+        model = list(DW_1, Db_1, DW_2, Db_2)
+        gradients = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, 
vDb_2)
+    }
+}
+
+gen_backward = function(matrix[double] dX, int iteration, list[unknown] model, 
list[unknown] gradients, list[unknown] params)

Review comment:
       no gradient input

##########
File path: src/test/scripts/applications/GAN/GAN_cnn.dml
##########
@@ -0,0 +1,510 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("nn/layers/affine.dml") as affine
+source("nn/layers/leaky_relu.dml") as leaky_relu
+source("nn/layers/conv2d_builtin.dml") as conv2d
+source("nn/layers/conv2d_transpose.dml") as conv2d_transpose
+source("nn/layers/log_loss.dml") as log_loss
+source("nn/layers/dropout.dml") as dropout
+source("nn/layers/batch_norm1d.dml") as batch_norm_1d
+source("nn/layers/batch_norm2d.dml") as batch_norm_2d
+source("nn/layers/softmax.dml") as softmax
+source("nn/layers/sigmoid.dml") as sigmoid
+source("nn/layers/tanh.dml") as tanh
+source("nn/optim/adam.dml") as adam
+
+train = function(matrix[double] X, int iterations)
+    return (matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, 
matrix[double] Gb_2, matrix[double] GW_3,
+            matrix[double] Gb_3, matrix[double] GW_4, matrix[double] Gb_4, 
matrix[double] DW_1, matrix[double] Db_1,
+            matrix[double] DW_2, matrix[double] Db_2, matrix[double] DW_3, 
matrix[double] Db_3)
+{
+/*
+   * Trains the generator and the discriminator of the GAN.
+   *
+   * The input matrix, X, has N examples, each with 784 features.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, 784).
+   *  - iterations: number of iterations for training
+   *
+   * Outputs:
+   *  - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, 
D).
+   *  - Gb_1: Generator 1st layer biases vector, of shape (1, D).
+   *  - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (256, 
128*HWf*HWf).
+   *  - Gb_2: Generator 2nd layer biases vector, of shape (128, 1).
+   *  - GW_3: Generator 3rd layer weights (parameters) matrix, of shape (128, 
64*HWf*HWf).
+   *  - Gb_3: Generator 3rd layer biases vector, of shape (64, 1).
+   *  - GW_4: Generator 4th layer weights (parameters) matrix, of shape (64, 
1*HWf*HWf).
+   *  - Gb_4: Generator 4th layer biases vector, of shape (1, 1).
+   *  - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape 
(64, 1*HWf*HWf).
+   *  - Db_1: Discriminator 1st layer biases vector, of shape (64, 1).
+   *  - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape 
(128, 64*HWf*HWf).
+   *  - Db_2: Discriminator 2nd layer biases vector, of shape (128, 1).
+   *  - DW_3: Discriminator 3rd layer weights (parameters) matrix, of shape 
(6272, 1).
+   *  - Db_3: Discriminator 3rd layer biases vector, of shape (1, 1).
+*/
+    N = nrow(X)
+    batch_size = 128
+    half_batch = batch_size / 2
+    D = 7*7*256
+    HWf = 5
+
+    #Define Generator:
+    [GW_1, Gb_1] = affine::init(100, D, -1)
+    [GW_2, Gb_2] = conv2d_transpose::init(128, 256, HWf, HWf)
+    [GW_3, Gb_3] = conv2d_transpose::init(64, 128, HWf, HWf)
+    [GW_4, Gb_4] = conv2d_transpose::init(1, 64, HWf, HWf)
+    [mGW_1, vGW_1] = adam::init(GW_1)
+    [mGb_1, vGb_1] = adam::init(Gb_1)
+    [mGW_2, vGW_2] = adam::init(GW_2)
+    [mGb_2, vGb_2] = adam::init(Gb_2)
+    [mGW_3, vGW_3] = adam::init(GW_3)
+    [mGb_3, vGb_3] = adam::init(Gb_3)
+    [mGW_4, vGW_4] = adam::init(GW_4)
+    [mGb_4, vGb_4] = adam::init(Gb_4)
+
+    gen_model = list(GW_1, Gb_1, GW_2, Gb_2, GW_3, Gb_3, GW_4, Gb_4)
+    gen_grad = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2, 
mGW_3, vGW_3, mGb_3, vGb_3, mGW_4, vGW_4, mGb_4, vGb_4)
+
+    #Define Discriminator:
+    [DW_1, Db_1] = conv2d::init(64, 1, HWf, HWf, -1)
+    [DW_2, Db_2] = conv2d::init(128, 64, HWf, HWf, -1)
+    [DW_3, Db_3] = affine::init(6272, 1, -1)
+    [mDW_1, vDW_1] = adam::init(DW_1)
+    [mDb_1, vDb_1] = adam::init(Db_1)
+    [mDW_2, vDW_2] = adam::init(DW_2)
+    [mDb_2, vDb_2] = adam::init(Db_2)
+    [mDW_3, vDW_3] = adam::init(DW_3)
+    [mDb_3, vDb_3] = adam::init(Db_3)
+
+    disc_model = list(DW_1, Db_1, DW_2, Db_2, DW_3, Db_3)
+    disc_grad = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2, 
mDW_3, vDW_3, mDb_3, vDb_3)
+
+    fake = matrix(0, 0, 784)
+
+    for(i in 1:iterations)
+    {
+        print('step ' + toString(i) + ' / ' + toString(iterations))
+        #generate samples
+        noise = rand(rows = half_batch, cols = 100, min = 0.0, max = 1.0)
+        [fake_images, gen_params] = gen_forward(noise, gen_model, 'train')
+        rand = sample(N, half_batch)
+        real_images = matrix(0, half_batch, 784)
+        for(r in 1:half_batch)
+        {
+            real_images[r,] = X[as.scalar(rand[r]),]
+        }
+
+        #train discriminator
+        [decision, disc_params] = disc_forward(real_images, disc_model)
+        targets = matrix(1, half_batch, 1)
+        dloss1 = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, 
i, disc_model, disc_grad, disc_params)
+        [decision, disc_params] = disc_forward(fake_images, disc_model)
+        targets = matrix(0, half_batch, 1)
+        dloss2 = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, 
i, disc_model, disc_grad, disc_params)
+        print('discriminator_loss: ' + toString((dloss1 + dloss2)))
+
+        #generate samples
+        noise = rand(rows = batch_size, cols = 100, min = 0.0, max = 1.0)
+        [fake_images, gen_params] = gen_forward(noise, gen_model, 'train')
+
+        #train generator
+        [decision, disc_params] = disc_forward(fake_images, disc_model)
+        targets = matrix(1, batch_size, 1)
+        gloss = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, TRUE, 
i, disc_model, disc_grad, disc_params)
+        [gen_model, gen_grad] = gen_backward(dX, i, gen_model, gen_grad, 
gen_params, 'train')
+        print('generator_loss: ' + toString(gloss))
+
+        # get sample generated image to observe evolution of generated images
+        if(i %% (iterations/10) == 0)
+        {
+            fake = rbind(fake, fake_images[1])
+        }
+    }
+    out_dir = "target/testTemp/applications/GAN/GANTest/"
+    fake = 0.5 * fake + 0.5
+    write(fake, out_dir+"/evo")
+    DW_1 = as.matrix(disc_model[1])
+    Db_1 = as.matrix(disc_model[2])
+    DW_2 = as.matrix(disc_model[3])
+    Db_2 = as.matrix(disc_model[4])
+    DW_3 = as.matrix(disc_model[5])
+    Db_3 = as.matrix(disc_model[6])
+    GW_1 = as.matrix(gen_model[1])
+    Gb_1 = as.matrix(gen_model[2])
+    GW_2 = as.matrix(gen_model[3])
+    Gb_2 = as.matrix(gen_model[4])
+    GW_3 = as.matrix(gen_model[5])
+    Gb_3 = as.matrix(gen_model[6])
+    GW_4 = as.matrix(gen_model[7])
+    Gb_4 = as.matrix(gen_model[8])
+}
+
+gen_forward = function(matrix[double] noise, list[unknown] model, String mode)
+    return(matrix[double] images, list[unknown] params)
+{
+/*
+   * Computes the forward pass of the generator.
+   * Generates fake images from input noise.
+   *
+   * Inputs:
+   *  - noise: Randomly generated noise, of shape (N, 100).
+   *  - model: List containing the generator weights and biases.
+   *  - mode: 'train' or 'test' for batch normalization layers.
+   *
+   * Outputs:
+   *  - images: Generated images, of shape (N, 784).
+   *  - params: List of outputs of the generator layers, needed for backward 
pass.
+*/
+    D = 7*7*256
+    HWf = 5
+    pad = 2
+    stride = 2
+
+    GW_1 = as.matrix(model[1])
+    Gb_1 = as.matrix(model[2])
+    GW_2 = as.matrix(model[3])
+    Gb_2 = as.matrix(model[4])
+    GW_3 = as.matrix(model[5])
+    Gb_3 = as.matrix(model[6])
+    GW_4 = as.matrix(model[7])
+    Gb_4 = as.matrix(model[8])
+
+    #Generator forward:
+    #Layer 1
+    out_1G = affine::forward(noise, GW_1, Gb_1)
+    [out_1G_batch_norm, ema_mean_upd_1, ema_var_upd_1, cache_mean_1, 
cache_var_1, cache_norm_1] = batch_norm_1d::forward(out_1G,
+                                                matrix(1,1,D), matrix(0,1,D), 
mode, matrix(0,1,D), matrix(1,1,D), 0.99, 0.001)
+    out_1G_leaky_relu = leaky_relu::forward(out_1G_batch_norm)
+    #Layer 2
+    [out_2G, hout_2G, wout_2G] = conv2d_transpose::forward(out_1G_leaky_relu, 
GW_2, Gb_2, 256, 7, 7, HWf, HWf, 1, 1,
+                                                                   pad, pad, 
0, 0)
+    [out_2G_batch_norm, ema_mean_upd_2, ema_var_upd_2, cache_mean_2, 
cache_inv_var_2] = batch_norm_2d::forward(out_2G,
+                matrix(1,128,1), matrix(0,128,1), 128, hout_2G, wout_2G, mode, 
matrix(0,128,1), matrix(1,128,1), 0.99, 0.001)
+    out_2G_leaky_relu = leaky_relu::forward(out_2G_batch_norm)
+
+    #Layer 3
+    [out_3G, hout_3G, wout_3G] = conv2d_transpose::forward(out_2G_leaky_relu, 
GW_3, Gb_3, 128, hout_2G, wout_2G, HWf,
+                                                                   HWf, 
stride, stride, pad, pad, 1, 1)
+    [out_3G_batch_norm, ema_mean_upd_3, ema_var_upd_3, cache_mean_3, 
cache_inv_var_3] = batch_norm_2d::forward(out_3G,
+                matrix(1,64,1), matrix(0,64,1), 64, hout_3G, wout_3G, mode, 
matrix(0,64,1), matrix(1,64,1), 0.99, 0.001)
+    out_3G_leaky_relu = leaky_relu::forward(out_3G_batch_norm)
+
+    #Output Layer
+    [out_4G, hout_4G, wout_4G] = conv2d_transpose::forward(out_3G_leaky_relu, 
GW_4, Gb_4, 64, hout_3G, wout_3G, HWf,
+                                                                   HWf, 
stride, stride, pad, pad, 1, 1)
+    out_4G_tanh = tanh::forward(out_4G)
+
+    images = out_4G_tanh
+    params = list(noise, out_1G, out_1G_batch_norm, ema_mean_upd_1, 
ema_var_upd_1, cache_mean_1, cache_var_1,
+                   cache_norm_1, out_1G_leaky_relu, out_2G, hout_2G, wout_2G, 
out_2G_batch_norm, cache_mean_2, cache_inv_var_2,
+                   out_2G_leaky_relu, out_3G, hout_3G, wout_3G, 
out_3G_batch_norm, cache_mean_3, cache_inv_var_3, out_3G_leaky_relu,
+                   out_4G, hout_4G, wout_4G)
+}
+
+disc_forward = function(matrix[double] X, list[unknown] model)
+    return(matrix[double] decision, list[unknown] params)
+{
+/*
+   * Computes the forward pass of the discriminator.
+   * Decides if input images are real or fake.
+   *
+   * Inputs:
+   *  - X: Input matrix containing sample images, of shape (N, 784).
+   *  - model: List containing the discriminator weights and biases.
+   *
+   * Outputs:
+   *  - decision: Decisions for realness of input, of shape (N, 1).
+   *  - params: List of outputs of the discriminator layers, needed for 
backward pass.
+*/
+    HWin = 28
+    HWf = 5
+    pad = 2
+    stride = 2
+
+    DW_1 = as.matrix(model[1])
+    Db_1 = as.matrix(model[2])
+    DW_2 = as.matrix(model[3])
+    Db_2 = as.matrix(model[4])
+    DW_3 = as.matrix(model[5])
+    Db_3 = as.matrix(model[6])
+
+    #Discriminator forward
+    #Layer 1
+    [out_1D, hout_1D, wout_1D] = conv2d::forward(X, DW_1, Db_1, 1, HWin, HWin, 
HWf, HWf, stride, stride, pad, pad)
+    out_1D_leaky_relu = leaky_relu::forward(out_1D)
+    [out_1D_dropout, mask_1] = dropout::forward(out_1D_leaky_relu, 0.3, -1)
+
+    #Layer 2
+    [out_2D, hout_2D, wout_2D] = conv2d::forward(out_1D_dropout, DW_2, Db_2, 
64, hout_1D, wout_1D, HWf, HWf, stride,
+                                                         stride, pad, pad)
+    out_2D_leaky_relu = leaky_relu::forward(out_2D)
+    [out_2D_dropout, mask_2] = dropout::forward(out_2D_leaky_relu, 0.3, -1)
+
+    #Output Layer
+    out_3D = affine::forward(out_2D_dropout, DW_3, Db_3)
+    decision = sigmoid::forward(out_3D)
+    params = list(X, out_1D, hout_1D, wout_1D, out_1D_leaky_relu, 
out_1D_dropout, mask_1, out_2D, hout_2D, wout_2D,
+                  out_2D_leaky_relu, out_2D_dropout, mask_2, out_3D)
+}
+
+disc_backward = function(matrix[double] decision, matrix[double] targets, 
boolean lock, int iteration, list[unknown] model, list[unknown] gradients,
+                         list[unknown] params)
+    return(matrix[double] dX, list[unknown] model, list[unknown] gradients)
+{
+/*
+   * Computes the backward pass of the discriminator.
+   * Updates gradients and weights of the discriminator.
+   *
+   * Inputs:
+   *  - decisions: Input matrix containing discriminator decisions, of shape 
(N, 1).
+   *  - targets: Target values for the decisions, of shape (N, 1).
+   *  - lock: Boolean that governs if discriminator weights are to be updated, 
TRUE means the weights are not updated.
+   *  - iteration: Current iteration of the training.
+   *  - model: List containing the discriminator weights and biases.
+   *  - gradients: List containing the discriminator gradients.
+   *  - params: List of outputs of the discriminator layers from the forward 
pass.
+   *
+   * Outputs:
+   *  - dX: Gradient wrt `X`, of shape (N, 784).
+   *  - model: List containing the updated discriminator weights and biases.
+   *  - gradients: List containing the updated discriminator gradients.
+*/
+    HWin = 28
+    HWf = 5
+    pad = 2
+    stride = 2
+
+    lr = 0.0002
+    beta1 = 0.5
+    beta2 = 0.999
+    epsilon = 1e-07
+
+    DW_1 = as.matrix(model[1])
+    Db_1 = as.matrix(model[2])
+    DW_2 = as.matrix(model[3])
+    Db_2 = as.matrix(model[4])
+    DW_3 = as.matrix(model[5])
+    Db_3 = as.matrix(model[6])
+
+    mDW_1 = as.matrix(gradients[1])
+    vDW_1 = as.matrix(gradients[2])
+    mDb_1 = as.matrix(gradients[3])
+    vDb_1 = as.matrix(gradients[4])
+    mDW_2 = as.matrix(gradients[5])
+    vDW_2 = as.matrix(gradients[6])
+    mDb_2 = as.matrix(gradients[7])
+    vDb_2 = as.matrix(gradients[8])
+    mDW_3 = as.matrix(gradients[9])
+    vDW_3 = as.matrix(gradients[10])
+    mDb_3 = as.matrix(gradients[11])
+    vDb_3 = as.matrix(gradients[12])
+
+    #Discriminator backward
+    #Output Layer
+    dloss = log_loss::backward(decision, targets)
+    dout_3D = sigmoid::backward(dloss, as.matrix(params[14]))
+    [dout_2D, dDW_3, dDb_3] = affine::backward(dout_3D, as.matrix(params[12]), 
DW_3, Db_3)
+
+    #Layer 2
+    dout_2D_dropout = dropout::backward(dout_2D, as.matrix(params[11]), 0.3, 
as.matrix(params[13]))
+    dout_2D_leaky_relu = leaky_relu::backward(dout_2D_dropout, 
as.matrix(params[8]))
+    [dout_1D, dDW_2, dDb_2] = conv2d::backward(dout_2D_leaky_relu, 
as.scalar(params[9]), as.scalar(params[10]),
+                                               as.matrix(params[6]), DW_2, 
Db_2, 64, as.scalar(params[3]),
+                                               as.scalar(params[4]), HWf, HWf, 
stride, stride, pad, pad)
+
+    #Layer 1
+    dout_1D_dropout = dropout::backward(dout_1D, as.matrix(params[5]), 0.3, 
as.matrix(params[7]))
+    dout_1D_leaky_relu = leaky_relu::backward(dout_1D_dropout, 
as.matrix(params[2]))
+    [dX, dDW_1, dDb_1] = conv2d::backward(dout_1D_leaky_relu, 
as.scalar(params[3]), as.scalar(params[4]),
+                                          as.matrix(params[1]), DW_1, Db_1, 1, 
HWin, HWin, HWf, HWf, stride, stride,
+                                          pad, pad)
+
+    if(!lock)
+    {
+        #optimize
+        [DW_1, mDW_1, vDW_1] = adam::update(DW_1, dDW_1, lr, beta1, beta2, 
epsilon, iteration, mDW_1, vDW_1)
+        [Db_1, mDb_1, vDb_1] = adam::update(Db_1, dDb_1, lr, beta1, beta2, 
epsilon, iteration, mDb_1, vDb_1)
+        [DW_2, mDW_2, vDW_2] = adam::update(DW_2, dDW_2, lr, beta1, beta2, 
epsilon, iteration, mDW_2, vDW_2)
+        [Db_2, mDb_2, vDb_2] = adam::update(Db_2, dDb_2, lr, beta1, beta2, 
epsilon, iteration, mDb_2, vDb_2)
+        [DW_3, mDW_3, vDW_3] = adam::update(DW_3, dDW_3, lr, beta1, beta2, 
epsilon, iteration, mDW_3, vDW_3)
+        [Db_3, mDb_3, vDb_3] = adam::update(Db_3, dDb_3, lr, beta1, beta2, 
epsilon, iteration, mDb_3, vDb_3)
+
+        model = list(DW_1, Db_1, DW_2, Db_2, DW_3, Db_3)
+        gradients = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, 
vDb_2, mDW_3, vDW_3, mDb_3, vDb_3)
+    }
+}
+
+gen_backward = function(matrix[double] dX, int iteration, list[unknown] model, 
list[unknown] gradients, list[unknown] params,

Review comment:
       same here, don't give gradients as an argument

##########
File path: src/test/java/org/apache/sysds/test/applications/GANTest.java
##########
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.applications;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.sysds.test.TestConfiguration;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+import org.apache.sysds.hops.OptimizerUtils;
+import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysds.runtime.meta.MatrixCharacteristics;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestUtils;
+
+@RunWith(value = Parameterized.class)
+@net.jcip.annotations.NotThreadSafe
+public class GANTest extends AutomatedTestBase
+{
+    protected final static String TEST_DIR = "applications/GAN/";
+    protected final static String TEST_NAME = "GAN_mnist";
+    protected String TEST_CLASS_DIR = TEST_DIR + GANTest.class.getSimpleName() 
+ "/";
+
+    protected int a_, b_;
+
+    public GANTest(int a, int b)
+    {
+
+    }
+
+    @Parameters
+    public static Collection<Object[]> data()
+    {
+        Object[][] data = new Object[][] { { 0, 0}};

Review comment:
       make arguments, something like "cnn" and "simple" since this is the two 
models you have made

##########
File path: src/test/scripts/applications/GAN/GAN_simple.dml
##########
@@ -0,0 +1,358 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("nn/layers/affine.dml") as affine
+source("nn/layers/leaky_relu.dml") as leaky_relu
+source("nn/layers/log_loss.dml") as log_loss
+source("nn/layers/softmax.dml") as softmax
+source("nn/layers/sigmoid.dml") as sigmoid
+source("nn/layers/tanh.dml") as tanh
+source("nn/optim/adam.dml") as adam
+
+train = function(matrix[double] X, int iterations)
+    return (matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, 
matrix[double] Gb_2, matrix[double] DW_1,
+            matrix[double] Db_1, matrix[double] DW_2, matrix[double] Db_2)
+{
+/*
+   * Trains the generator and the discriminator of the GAN.
+   *
+   * The input matrix, X, has N examples, each with 784 features.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, 784).
+   *  - iterations: number of iterations for training
+   *
+   * Outputs:
+   *  - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, 
128).
+   *  - Gb_1: Generator 1st layer biases vector, of shape (1, 128).
+   *  - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (128, 
784).
+   *  - Gb_2: Generator 2st layer biases vector, of shape (1, 784).
+   *  - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape 
(784, 128).
+   *  - Db_1: Discriminator 1st layer biases vector, of shape (1, 128).
+   *  - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape 
(128, 1).
+   *  - Db_2: Discriminator 2nd layer biases vector, of shape (1, 1).
+*/
+    N = nrow(X)
+    batch_size = 128
+    half_batch = batch_size / 2
+
+    #Define Generator:
+    [GW_1, Gb_1] = affine::init(100, 128, -1)
+    [GW_2, Gb_2] = affine::init(128, 28*28, -1)
+    [mGW_1, vGW_1] = adam::init(GW_1)
+    [mGb_1, vGb_1] = adam::init(Gb_1)
+    [mGW_2, vGW_2] = adam::init(GW_2)
+    [mGb_2, vGb_2] = adam::init(Gb_2)
+
+    gen_model = list(GW_1, Gb_1, GW_2, Gb_2)
+    gen_grad = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2)
+
+    #Define Discriminator:
+    [DW_1, Db_1] = affine::init(28*28, 128, -1)
+    [DW_2, Db_2] = affine::init(128, 1, -1)
+    [mDW_1, vDW_1] = adam::init(DW_1)
+    [mDb_1, vDb_1] = adam::init(Db_1)
+    [mDW_2, vDW_2] = adam::init(DW_2)
+    [mDb_2, vDb_2] = adam::init(Db_2)
+
+    disc_model = list(DW_1, Db_1, DW_2, Db_2)
+    disc_grad = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2)
+
+    fake = matrix(0, 0, 784)
+
+    for(i in 1:iterations)
+    {
+        print('step ' + toString(i) + ' / ' + toString(iterations))
+        #generate samples
+        noise = rand(rows = half_batch, cols = 100, min = 0.0, max = 1.0)
+        [fake_images, gen_params] = gen_forward(noise, gen_model)
+        rand = sample(N, half_batch)
+        real_images = matrix(0, half_batch, 784)
+        for(r in 1:half_batch)
+        {
+            real_images[r,] = X[as.scalar(rand[r]),]
+        }
+
+        #train discriminator
+        [decision, disc_params] = disc_forward(real_images, disc_model)
+        targets = matrix(1, half_batch, 1)
+        dloss1 = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, 
i, disc_model, disc_grad, disc_params)
+        [decision, disc_params] = disc_forward(fake_images, disc_model)
+        targets = matrix(0, half_batch, 1)
+        dloss2 = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, 
i, disc_model, disc_grad, disc_params)
+        print('discriminator_loss: ' + toString((dloss1 + dloss2)))
+
+        #generate samples
+        noise = rand(rows = batch_size, cols = 100, min = 0.0, max = 1.0)
+        [fake_images, gen_params] = gen_forward(noise, gen_model)
+
+        #train generator
+        [decision, disc_params] = disc_forward(fake_images, disc_model)
+        targets = matrix(1, batch_size, 1)
+        gloss = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, TRUE, 
i, disc_model, disc_grad, disc_params)
+        [gen_model, gen_grad] = gen_backward(dX, i, gen_model, gen_grad, 
gen_params)
+        print('generator_loss: ' + toString(gloss))
+
+        # get sample generated image to observe evolution of generated images
+        if(i %% (iterations/10) == 0)
+        {
+            fake = rbind(fake, fake_images[1])
+        }
+    }
+    out_dir = "target/testTemp/applications/GAN/GANTest/"
+    fake = 0.5 * fake + 0.5
+    write(fake, out_dir+"/evo")
+    DW_1 = as.matrix(disc_model[1])
+    Db_1 = as.matrix(disc_model[2])
+    DW_2 = as.matrix(disc_model[3])
+    Db_2 = as.matrix(disc_model[4])
+    GW_1 = as.matrix(gen_model[1])
+    Gb_1 = as.matrix(gen_model[2])
+    GW_2 = as.matrix(gen_model[3])
+    Gb_2 = as.matrix(gen_model[4])
+}
+
+gen_forward = function(matrix[double] noise, list[unknown] model)
+    return(matrix[double] images, list[unknown] params)
+{
+/*
+   * Computes the forward pass of the generator.
+   * Generates fake images from input noise.
+   *
+   * Inputs:
+   *  - noise: Randomly generated noise, of shape (N, 100).
+   *  - model: List containing the generator weights and biases.
+   *
+   * Outputs:
+   *  - images: Generated images, of shape (N, 784).
+   *  - params: List of outputs of the generator layers, needed for backward 
pass.
+*/
+    GW_1 = as.matrix(model[1])
+    Gb_1 = as.matrix(model[2])
+    GW_2 = as.matrix(model[3])
+    Gb_2 = as.matrix(model[4])
+
+    #Generator forward:
+    #Layer 1
+    out_1G = affine::forward(noise, GW_1, Gb_1)
+    out_1G_leaky_relu = leaky_relu::forward(out_1G)
+    #Layer 2
+    out_2G = affine::forward(out_1G_leaky_relu, GW_2, Gb_2)
+    out_2G_tanh = tanh::forward(out_2G)
+    images = out_2G_tanh
+    params = list(noise, out_1G, out_1G_leaky_relu, out_2G)
+}
+
+disc_forward = function(matrix[double] X, list[unknown] model)
+    return(matrix[double] decision, list[unknown] params)
+{
+/*
+   * Computes the forward pass of the discriminator.
+   * Decides if input images are real or fake.
+   *
+   * Inputs:
+   *  - X: Input matrix containing sample images, of shape (N, 784).
+   *  - model: List containing the discriminator weights and biases.
+   *
+   * Outputs:
+   *  - decision: Decisions for realness of input, of shape (N, 1).
+   *  - params: List of outputs of the discriminator layers, needed for 
backward pass.
+*/
+    DW_1 = as.matrix(model[1])
+    Db_1 = as.matrix(model[2])
+    DW_2 = as.matrix(model[3])
+    Db_2 = as.matrix(model[4])
+
+    #Discriminator forward
+    #Layer 1
+    out_1D = affine::forward(X, DW_1, Db_1)
+    out_1D_leaky_relu = leaky_relu::forward(out_1D)
+
+    #Layer 2
+    out_2D = affine::forward(out_1D_leaky_relu, DW_2, Db_2)
+    decision = sigmoid::forward(out_2D)
+    params = list(X, out_1D, out_1D_leaky_relu, out_2D)
+}
+
+disc_backward = function(matrix[double] decision, matrix[double] targets, 
boolean lock, int iteration, list[unknown] model, list[unknown] gradients,

Review comment:
       no gradient input

##########
File path: src/test/java/org/apache/sysds/test/applications/GANTest.java
##########
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.applications;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.sysds.test.TestConfiguration;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+import org.apache.sysds.hops.OptimizerUtils;
+import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysds.runtime.meta.MatrixCharacteristics;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestUtils;
+
+@RunWith(value = Parameterized.class)
+@net.jcip.annotations.NotThreadSafe
+public class GANTest extends AutomatedTestBase
+{
+    protected final static String TEST_DIR = "applications/GAN/";
+    protected final static String TEST_NAME = "GAN_mnist";
+    protected String TEST_CLASS_DIR = TEST_DIR + GANTest.class.getSimpleName() 
+ "/";
+
+    protected int a_, b_;
+
+    public GANTest(int a, int b)
+    {
+
+    }
+
+    @Parameters
+    public static Collection<Object[]> data()
+    {
+        Object[][] data = new Object[][] { { 0, 0}};
+        return Arrays.asList(data);
+    }
+
+    @Override
+    public void setUp()
+    {
+        addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, 
TEST_NAME, new String[] {"accuracy.scalar"}));
+    }
+
+    @Test
+    public void testGAN()
+    {
+        System.out.println("Running GAN test");
+        getAndLoadTestConfiguration(TEST_NAME);
+        fullDMLScriptName = getScript();
+        List<String> proArgs = new ArrayList<>();
+        proArgs.add("-args");
+        proArgs.add("cnn");
+        //proArgs.add("simple");
+        proArgs.add(output(""));
+        programArgs = proArgs.toArray(new String[proArgs.size()]);
+        runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+
+        writeExpectedScalar("accuracy", 0.5);
+        compareResults( 0.15);
+    }
+}

Review comment:
       add newline to make GitHub happy.

##########
File path: src/test/scripts/applications/GAN/doc.md
##########
@@ -0,0 +1,65 @@
+Simple GAN model:

Review comment:
       i would not include this file, since you specify in the actual files 
what is included.
   but if you keep it, you need to add the license in the top.
   see README.md for an example




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@systemds.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[GitHub] [systemds] Baunsgaard commented on a change in pull request #1324: [WIP] [AMLS] GAN mnist

Reply via email to