[GitHub] marcoabreu closed pull request #11626: [MXNET-651] MXNet Model Backwards Compatibility Checker

GitBox Tue, 31 Jul 2018 02:50:18 -0700

marcoabreu closed pull request #11626: [MXNET-651] MXNet Model Backwards 
Compatibility Checker
URL: https://github.com/apache/incubator-mxnet/pull/11626


This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/ci/docker/install/ubuntu_python.sh 
b/ci/docker/install/ubuntu_python.sh
index f087f07091e..e71cac8a389 100755
--- a/ci/docker/install/ubuntu_python.sh
+++ b/ci/docker/install/ubuntu_python.sh
@@ -29,5 +29,5 @@ wget -nv https://bootstrap.pypa.io/get-pip.py
 python3 get-pip.py
 python2 get-pip.py
 
-pip2 install nose cpplint==1.3.0 pylint==1.8.3 'numpy<1.15.0,>=1.8.2' 
nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy==1.0.1
-pip3 install nose cpplint==1.3.0 pylint==1.8.3 'numpy<1.15.0,>=1.8.2' 
nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy==1.0.1
+pip2 install nose cpplint==1.3.0 pylint==1.8.3 'numpy<1.15.0,>=1.8.2' 
nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy==1.0.1 boto3
+pip3 install nose cpplint==1.3.0 pylint==1.8.3 'numpy<1.15.0,>=1.8.2' 
nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy==1.0.1 boto3
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index a0795eb58a5..4fcde72faa3 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -895,6 +895,20 @@ nightly_test_javascript() {
     make -C /work/mxnet/amalgamation libmxnet_predict.js MIN=1 
EMCC=/work/deps/emscripten/emcc
 }
 
+#Tests Model backwards compatibility on MXNet
+nightly_model_backwards_compat_test() {
+    set -ex
+    export PYTHONPATH=/work/mxnet/python/
+    
./tests/nightly/model_backwards_compatibility_check/model_backward_compat_checker.sh
+}
+
+#Backfills S3 bucket with models trained on earlier versions of mxnet
+nightly_model_backwards_compat_train() {
+    set -ex
+    export PYTHONPATH=./python/
+    
./tests/nightly/model_backwards_compatibility_check/train_mxnet_legacy_models.sh
+}
+
 # Nightly 'MXNet: The Straight Dope' Single-GPU Tests
 nightly_straight_dope_python2_single_gpu_tests() {
     set -ex
diff --git 
a/tests/nightly/model_backwards_compatibility_check/JenkinsfileForMBCC 
b/tests/nightly/model_backwards_compatibility_check/JenkinsfileForMBCC
new file mode 100644
index 00000000000..412d68d56ff
--- /dev/null
+++ b/tests/nightly/model_backwards_compatibility_check/JenkinsfileForMBCC
@@ -0,0 +1,120 @@
+// -*- mode: groovy -*-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+//This is a Jenkinsfile for the model backwards compatibility checker. The 
format and some functions have been picked up from the top-level Jenkinsfile.
+
+err = null
+mx_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 
3rdparty/tvm/nnvm/lib/libnnvm.a'
+
+def init_git() {
+  deleteDir()
+  retry(5) {
+    try {
+      timeout(time: 15, unit: 'MINUTES') {
+        checkout scm
+        sh 'git submodule update --init --recursive'
+        sh 'git clean -d -f'
+      }
+    } catch (exc) {
+      deleteDir()
+      error "Failed to fetch source codes with ${exc}"
+      sleep 2
+    }
+  }
+}
+
+// pack libraries for later use
+def pack_lib(name, libs=mx_lib) {
+  sh """
+echo "Packing ${libs} into ${name}"
+echo ${libs} | sed -e 's/,/ /g' | xargs md5sum
+"""
+  stash includes: libs, name: name
+}
+
+// unpack libraries saved before
+def unpack_lib(name, libs=mx_lib) {
+  unstash name
+  sh """
+echo "Unpacked ${libs} from ${name}"
+echo ${libs} | sed -e 's/,/ /g' | xargs md5sum
+"""
+}
+
+def docker_run(platform, function_name, use_nvidia, shared_mem = '500m') {
+  def command = "ci/build.py --docker-registry ${env.DOCKER_CACHE_REGISTRY} 
%USE_NVIDIA% --platform %PLATFORM% --shm-size %SHARED_MEM% 
/work/runtime_functions.sh %FUNCTION_NAME%"
+  command = command.replaceAll('%USE_NVIDIA%', use_nvidia ? '--nvidiadocker' : 
'')
+  command = command.replaceAll('%PLATFORM%', platform)
+  command = command.replaceAll('%FUNCTION_NAME%', function_name)
+  command = command.replaceAll('%SHARED_MEM%', shared_mem)
+
+  sh command
+}
+
+try {
+  stage('MBCC Train'){
+    node('restricted-mxnetlinux-cpu') {
+      ws('workspace/modelBackwardsCompat') {
+        init_git()
+        // Train models on older versions
+        docker_run('ubuntu_nightly_cpu', 
'nightly_model_backwards_compat_train', false)
+        // upload files to S3 here outside of the docker environment
+        sh 
"./tests/nightly/model_backwards_compatibility_check/upload_models_to_s3.sh"
+      }
+    }
+  }
+
+  stage('MXNet Build'){
+    node('restricted-mxnetlinux-cpu') {
+      ws('workspace/build-cpu') {
+        init_git()
+        docker_run('ubuntu_cpu','build_ubuntu_cpu', false)
+        pack_lib('cpu', mx_lib)
+      }
+    }
+  }
+
+  stage('MBCC Inference'){
+    node('restricted-mxnetlinux-cpu') {
+      ws('workspace/modelBackwardsCompat') {
+        init_git()
+        unpack_lib('cpu', mx_lib)
+        // Perform inference on the latest version of MXNet
+        docker_run('ubuntu_nightly_cpu', 
'nightly_model_backwards_compat_test', false)
+      }
+    }
+  }
+} catch (caughtError) {
+  node("restricted-mxnetlinux-cpu") {
+    sh "echo caught ${caughtError}"
+    err = caughtError
+    currentBuild.result = "FAILURE"
+  }
+} finally {
+  node("restricted-mxnetlinux-cpu") {
+    // Only send email if model backwards compat test failed
+    if (currentBuild.result == "FAILURE") {
+       emailext body: 'Nightly tests for model backwards compatibity on MXNet 
branch : ${BRANCH_NAME} failed. Please view the build at ${BUILD_URL}', 
replyTo: '${EMAIL}', subject: '[MODEL BACKWARDS COMPATIBILITY TEST FAILED] 
build ${BUILD_NUMBER}', to: '${EMAIL}'
+    }
+    // Remember to rethrow so the build is marked as failing
+    if (err) {
+      throw err
+    }
+  }
+}
diff --git a/tests/nightly/model_backwards_compatibility_check/README.md 
b/tests/nightly/model_backwards_compatibility_check/README.md
new file mode 100644
index 00000000000..7a2116ac564
--- /dev/null
+++ b/tests/nightly/model_backwards_compatibility_check/README.md
@@ -0,0 +1,25 @@
+# Model Backwards Compatibility Tests
+
+This folder contains the scripts that are required to run the nightly job of 
verifying the compatibility and inference results of models (trained on earlier 
versions of MXNet) when loaded on the latest release candidate. The tests flag 
if:
+- The models fail to load on the latest version of MXNet.
+- The inference results are different. 
+
+ 
+## JenkinsfileForMBCC
+This is configuration file for jenkins job.
+
+## Details 
+- Currently the APIs that covered for model saving/loading are : 
do_checkpoint/load_checkpoint, save_params/load_params, 
save_parameters/load_parameters(added v1.2.1 onwards), 
export/gluon.SymbolBlock.imports. 
+- These APIs are covered over models with architectures such as : MLP, RNNs, 
LeNet, LSTMs covering the four scenarios described above.
+- More operators/models will be added in the future to extend the operator 
coverage. 
+- The model train file is suffixed by `_train.py` and the trained models are 
hosted in AWS S3.
+- The trained models for now are backfilled into S3 starting from every MXNet 
release version v1.1.0 via the `train_mxnet_legacy_models.sh`. 
+- `train_mxnet_legacy_models.sh` script checks out the previous two releases 
using git tag command and trains and uploads models to S3 on those MXNet 
versions.
+- The S3 bucket's folder structure looks like this : 
+    * 1.1.0/<model-1-files>  1.1.0/<model-2-files> 
+    * 1.2.0/<model-1-files> 1.2.0/<model-2-files>
+- The <model-1-files> is also a folder which contains the trained model symbol 
definitions, toy datasets it was trained on, weights and parameters of the 
model and other relevant files required to reload the model.
+- Over a period of time, the training script would have accumulated a 
repository of models trained over several versions of MXNet (both major and 
minor releases).
+- The inference part is checked via the script 
`model_backwards_compat_inference.sh`.
+- The inference script scans the S3 bucket for MXNet version folders as 
described above and runs the inference code for each model folder found.
+
diff --git a/tests/nightly/model_backwards_compatibility_check/common.py 
b/tests/nightly/model_backwards_compatibility_check/common.py
new file mode 100644
index 00000000000..4c61cc4e326
--- /dev/null
+++ b/tests/nightly/model_backwards_compatibility_check/common.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import boto3
+import mxnet as mx
+import os
+import numpy as np
+import logging
+from mxnet import gluon
+import mxnet.ndarray as F
+from mxnet.gluon import nn
+import re
+from mxnet.test_utils import assert_almost_equal
+
+# Set fixed random seeds.
+mx.random.seed(7)
+np.random.seed(7)
+logging.basicConfig(level=logging.INFO)
+
+# get the current mxnet version we are running on
+mxnet_version = mx.__version__
+model_bucket_name = 'mxnet-ci-prod-backwards-compatibility-models'
+data_folder = 'mxnet-model-backwards-compatibility-data'
+backslash = '/'
+s3 = boto3.resource('s3')
+ctx = mx.cpu(0)
+
+
+def get_model_path(model_name):
+    return os.path.join(os.getcwd(), 'models', str(mxnet_version), model_name)
+
+
+def get_module_api_model_definition():
+    input = mx.symbol.Variable('data')
+    input = mx.symbol.Flatten(data=input)
+
+    fc1 = mx.symbol.FullyConnected(data=input, name='fc1', num_hidden=128)
+    act1 = mx.sym.Activation(data=fc1, name='relu1', act_type="relu")
+    fc2 = mx.symbol.FullyConnected(data=act1, name='fc2', num_hidden=2)
+    op = mx.symbol.SoftmaxOutput(data=fc2, name='softmax')
+    model = mx.mod.Module(symbol=op, context=ctx, data_names=['data'], 
label_names=['softmax_label'])
+    return model
+
+
+def save_inference_results(inference_results, model_name):
+    assert (isinstance(inference_results, mx.ndarray.ndarray.NDArray))
+    save_path = os.path.join(get_model_path(model_name), ''.join([model_name, 
'-inference']))
+
+    mx.nd.save(save_path, {'inference': inference_results})
+
+
+def load_inference_results(model_name):
+    inf_dict = mx.nd.load(model_name+'-inference')
+    return inf_dict['inference']
+
+
+def save_data_and_labels(test_data, test_labels, model_name):
+    assert (isinstance(test_data, mx.ndarray.ndarray.NDArray))
+    assert (isinstance(test_labels, mx.ndarray.ndarray.NDArray))
+
+    save_path = os.path.join(get_model_path(model_name), ''.join([model_name, 
'-data']))
+    mx.nd.save(save_path, {'data': test_data, 'labels': test_labels})
+
+
+def clean_model_files(files, model_name):
+    files.append(model_name + '-inference')
+    files.append(model_name + '-data')
+
+    for file in files:
+        if os.path.isfile(file):
+            os.remove(file)
+
+
+def download_model_files_from_s3(model_name, folder_name):
+    model_files = list()
+    bucket = s3.Bucket(model_bucket_name)
+    prefix = folder_name + backslash + model_name
+    model_files_meta = list(bucket.objects.filter(Prefix = prefix))
+    if len(model_files_meta) == 0:
+        logging.error('No trained models found under path : %s', prefix)
+        return model_files
+    for obj in model_files_meta:
+        file_name = obj.key.split('/')[2]
+        model_files.append(file_name)
+        # Download this file
+        bucket.download_file(obj.key, file_name)
+
+    return model_files
+
+
+def get_top_level_folders_in_bucket(s3client, bucket_name):
+    # This function returns the top level folders in the S3Bucket.
+    # These folders help us to navigate to the trained model files stored for 
different MXNet versions.
+    bucket = s3client.Bucket(bucket_name)
+    result = bucket.meta.client.list_objects(Bucket=bucket.name, 
Delimiter=backslash)
+    folder_list = list()
+    if 'CommonPrefixes' not in result:
+        logging.error('No trained models found in S3 bucket : %s for this 
file. '
+                      'Please train the models and run inference again' % 
bucket_name)
+        raise Exception("No trained models found in S3 bucket : %s for this 
file. "
+                        "Please train the models and run inference again" % 
bucket_name)
+        return folder_list
+    for obj in result['CommonPrefixes']:
+        folder_name = obj['Prefix'].strip(backslash)
+        # We only compare models from the same major versions. i.e. 1.x.x 
compared with latest 1.y.y etc
+        if str(folder_name).split('.')[0] != str(mxnet_version).split('.')[0]:
+            continue
+        # The top level folders contain MXNet Version # for trained models. 
Skipping the data folder here
+        if folder_name == data_folder:
+            continue
+        folder_list.append(obj['Prefix'].strip(backslash))
+
+    if len(folder_list) == 0:
+        logging.error('No trained models found in S3 bucket : %s for this 
file. '
+                      'Please train the models and run inference again' % 
bucket_name)
+        raise Exception("No trained models found in S3 bucket : %s for this 
file. "
+                        "Please train the models and run inference again" % 
bucket_name)
+    return folder_list
+
+
+def create_model_folder(model_name):
+    path = get_model_path(model_name)
+    if not os.path.exists(path):
+        os.makedirs(path)
+
+
+class Net(gluon.Block):
+    def __init__(self, **kwargs):
+        super(Net, self).__init__(**kwargs)
+        with self.name_scope():
+            # layers created in name_scope will inherit name space
+            # from parent layer.
+            self.conv1 = nn.Conv2D(20, kernel_size=(5, 5))
+            self.pool1 = nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2))
+            self.conv2 = nn.Conv2D(50, kernel_size=(5, 5))
+            self.pool2 = nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2))
+            self.fc1 = nn.Dense(500)
+            self.fc2 = nn.Dense(2)
+
+    def forward(self, x):
+        x = self.pool1(F.tanh(self.conv1(x)))
+        x = self.pool2(F.tanh(self.conv2(x)))
+        # 0 means copy over size from corresponding dimension.
+        # -1 means infer size from the rest of dimensions.
+        x = x.reshape((0, -1))
+        x = F.tanh(self.fc1(x))
+        x = F.tanh(self.fc2(x))
+        return x
+
+
+class HybridNet(gluon.HybridBlock):
+    def __init__(self, **kwargs):
+        super(HybridNet, self).__init__(**kwargs)
+        with self.name_scope():
+            # layers created in name_scope will inherit name space
+            # from parent layer.
+            self.conv1 = nn.Conv2D(20, kernel_size=(5, 5))
+            self.pool1 = nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2))
+            self.conv2 = nn.Conv2D(50, kernel_size=(5, 5))
+            self.pool2 = nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2))
+            self.fc1 = nn.Dense(500)
+            self.fc2 = nn.Dense(2)
+
+    def hybrid_forward(self, F, x):
+        x = self.pool1(F.tanh(self.conv1(x)))
+        x = self.pool2(F.tanh(self.conv2(x)))
+        # 0 means copy over size from corresponding dimension.
+        # -1 means infer size from the rest of dimensions.
+        x = x.reshape((0, -1))
+        x = F.tanh(self.fc1(x))
+        x = F.tanh(self.fc2(x))
+        return x
+
+
+class SimpleLSTMModel(gluon.Block):
+    def __init__(self, **kwargs):
+        super(SimpleLSTMModel, self).__init__(**kwargs)
+        with self.name_scope():
+            self.model = mx.gluon.nn.Sequential(prefix='')
+            with self.model.name_scope():
+                self.model.add(mx.gluon.nn.Embedding(30, 10))
+                self.model.add(mx.gluon.rnn.LSTM(20))
+                self.model.add(mx.gluon.nn.Dense(100))
+                self.model.add(mx.gluon.nn.Dropout(0.5))
+                self.model.add(mx.gluon.nn.Dense(2, flatten=True, 
activation='tanh'))
+
+    def forward(self, x):
+        return self.model(x)
+
+
+def compare_versions(version1, version2):
+    '''
+    
https://stackoverflow.com/questions/1714027/version-number-comparison-in-python
+    '''
+    def normalize(v):
+        return [int(x) for x in re.sub(r'(\.0+)*$','', v).split(".")]
+    return cmp(normalize(version1), normalize(version2))
diff --git 
a/tests/nightly/model_backwards_compatibility_check/model_backward_compat_checker.sh
 
b/tests/nightly/model_backwards_compatibility_check/model_backward_compat_checker.sh
new file mode 100755
index 00000000000..23386836ed8
--- /dev/null
+++ 
b/tests/nightly/model_backwards_compatibility_check/model_backward_compat_checker.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#Author: Piyush Ghai
+
+set -ex
+
+echo "Invoking model_backwards_compat_checker.sh script"
+echo `pwd`
+cd tests/nightly/model_backwards_compatibility_check
+echo `pwd`
+
+echo '=========================='
+python model_backwards_compat_inference.py
\ No newline at end of file
diff --git 
a/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_inference.py
 
b/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_inference.py
new file mode 100644
index 00000000000..ae368e3a0fc
--- /dev/null
+++ 
b/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_inference.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from common import *
+
+
+def test_module_checkpoint_api():
+    model_name = 'module_checkpoint_api'
+    print ('Performing inference for model/API %s' % model_name)
+
+    # For each MXNet version that has the saved models
+    for folder in get_top_level_folders_in_bucket(s3, model_bucket_name):
+        logging.info('Fetching files for MXNet version : %s and model %s' % 
(folder, model_name))
+        model_files = download_model_files_from_s3(model_name, folder)
+        if len(model_files) == 0:
+            logging.warn('No training files found for %s for MXNet version : 
%s' % (model_name, folder))
+            continue
+
+        data = mx.nd.load(''.join([model_name, '-data']))
+        data_iter = mx.io.NDArrayIter(data['data'], data['labels'], 
batch_size=10)
+        # Load the model and perform inference
+        loaded_model = get_module_api_model_definition()
+
+        sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, 1)
+        loaded_model.bind(data_shapes=data_iter.provide_data, 
label_shapes=data_iter.provide_label)
+        loaded_model.set_params(arg_params, aux_params)
+
+        old_inference_results = load_inference_results(model_name)
+        inference_results = loaded_model.predict(data_iter)
+        # Check whether they are equal or not ?
+        assert_almost_equal(inference_results.asnumpy(), 
old_inference_results.asnumpy())
+        clean_model_files(model_files, model_name)
+        logging.info('=================================')
+
+    logging.info('Assertion passed for model : %s' % model_name)
+
+
+def test_lenet_gluon_load_params_api():
+    model_name = 'lenet_gluon_save_params_api'
+    logging.info('Performing inference for model/API %s' % model_name)
+
+    for folder in get_top_level_folders_in_bucket(s3, model_bucket_name):
+        logging.info('Fetching files for MXNet version : %s and model %s' % 
(folder, model_name))
+        model_files = download_model_files_from_s3(model_name, folder)
+        if len(model_files) == 0:
+            logging.warn('No training files found for %s for MXNet version : 
%s' % (model_name, folder))
+            continue
+
+        data = mx.nd.load(''.join([model_name, '-data']))
+        test_data = data['data']
+        # Load the model and perform inference
+        loaded_model = Net()
+        loaded_model.load_params(model_name + '-params')
+        output = loaded_model(test_data)
+        old_inference_results = mx.nd.load(model_name + 
'-inference')['inference']
+        assert_almost_equal(old_inference_results.asnumpy(), output.asnumpy())
+        clean_model_files(model_files, model_name)
+        logging.info('=================================')
+    logging.info('Assertion passed for model : %s' % model_name)
+
+
+def test_lenet_gluon_hybrid_imports_api():
+    model_name = 'lenet_gluon_hybrid_export_api'
+    logging.info('Performing inference for model/API %s' % model_name)
+
+    for folder in get_top_level_folders_in_bucket(s3, model_bucket_name):
+        logging.info('Fetching files for MXNet version : %s and model %s' % 
(folder, model_name))
+        model_files = download_model_files_from_s3(model_name, folder)
+        if len(model_files) == 0:
+            logging.warn('No training files found for %s for MXNet version : 
%s' % (model_name, folder))
+            continue
+            # Load the model and perform inference
+        data = mx.nd.load(''.join([model_name, '-data']))
+        test_data = data['data']
+        loaded_model = HybridNet()
+        loaded_model = gluon.SymbolBlock.imports(model_name + '-symbol.json', 
['data'], model_name + '-0000.params')
+        output = loaded_model(test_data)
+        old_inference_results = mx.nd.load(model_name + 
'-inference')['inference']
+        assert_almost_equal(old_inference_results.asnumpy(), output.asnumpy())
+        clean_model_files(model_files, model_name)
+        logging.info('=================================')
+    logging.info('Assertion passed for model : %s' % model_name)
+
+
+def test_lstm_gluon_load_parameters_api():
+    # If this code is being run on version >= 1.2.0 only then execute it,
+    # since it uses save_parameters and load_parameters API
+
+    if compare_versions(str(mxnet_version), '1.2.1') < 0:
+        logging.warn('Found MXNet version %s and exiting because this version 
does not contain save_parameters'
+                     ' and load_parameters functions' % str(mxnet_version))
+        return
+
+    model_name = 'lstm_gluon_save_parameters_api'
+    logging.info('Performing inference for model/API %s and model' % 
model_name)
+
+    for folder in get_top_level_folders_in_bucket(s3, model_bucket_name):
+        logging.info('Fetching files for MXNet version : %s' % folder)
+        model_files = download_model_files_from_s3(model_name, folder)
+        if len(model_files) == 0:
+            logging.warn('No training files found for %s for MXNet version : 
%s' % (model_name, folder))
+            continue
+
+        data = mx.nd.load(''.join([model_name, '-data']))
+        test_data = data['data']
+        # Load the model and perform inference
+        loaded_model = SimpleLSTMModel()
+        loaded_model.load_parameters(model_name + '-params')
+        output = loaded_model(test_data)
+        old_inference_results = mx.nd.load(model_name + 
'-inference')['inference']
+        assert_almost_equal(old_inference_results.asnumpy(), output.asnumpy())
+        clean_model_files(model_files, model_name)
+        logging.info('=================================')
+    logging.info('Assertion passed for model : %s' % model_name)
+
+
+if __name__ == '__main__':
+    test_module_checkpoint_api()
+    test_lenet_gluon_load_params_api()
+    test_lenet_gluon_hybrid_imports_api()
+    test_lstm_gluon_load_parameters_api()
diff --git 
a/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_train.py
 
b/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_train.py
new file mode 100644
index 00000000000..289d47c705d
--- /dev/null
+++ 
b/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_train.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from common import *
+
+
+def train_module_checkpoint_api():
+    model_name = 'module_checkpoint_api'
+    create_model_folder(model_name)
+    logging.info('Saving files for model %s' % model_name)
+    # Prepare data
+    test_data = mx.nd.array(np.random.uniform(-1, 1, size=(20, 1)))
+    test_label = mx.nd.array(np.random.randint(0, 2, size=(20,)), 
dtype='float32')
+    data_iter = mx.io.NDArrayIter(test_data, test_label, batch_size=10)
+
+    mod = get_module_api_model_definition()
+    mod.bind(data_shapes=data_iter.provide_data, 
label_shapes=data_iter.provide_label)
+    weights = mx.initializer.Xavier(magnitude=2.57)
+    mod.init_params(weights)
+
+    mod.save_checkpoint(os.path.join(get_model_path(model_name), model_name), 
1)
+
+    inference_results = mod.predict(data_iter)
+    # Save inference_results
+    # Save the model files
+    save_data_and_labels(test_data, test_label, model_name)
+    save_inference_results(inference_results, model_name)
+
+
+def train_lenet_gluon_save_params_api():
+    model_name = 'lenet_gluon_save_params_api'
+    create_model_folder(model_name)
+    logging.info('Saving files for model %s' % model_name)
+    net = Net()
+    weights = mx.initializer.Xavier(magnitude=2.57)
+    net.initialize(weights, ctx=[mx.cpu(0)])
+    # Prepare data
+
+    test_data = mx.nd.array(np.random.uniform(-1, 1, size=(20, 1, 30, 30)))
+    output = net(test_data)
+    # print (y)
+
+    mx.nd.save(os.path.join(get_model_path(model_name), ''.join([model_name, 
'-data'])), {'data': test_data})
+    save_inference_results(output, model_name)
+    net.save_params(os.path.join(get_model_path(model_name), 
''.join([model_name, '-params'])))
+
+
+def train_lenet_gluon_hybrid_export_api():
+    model_name = 'lenet_gluon_hybrid_export_api'
+    logging.info('Saving files for model %s' % model_name)
+    create_model_folder(model_name)
+    net = HybridNet()
+    weights = mx.initializer.Xavier(magnitude=2.57)
+    net.initialize(weights, ctx=[mx.cpu(0)])
+    net.hybridize()
+    # Prepare data
+    test_data = mx.nd.array(np.random.uniform(-1, 1, size=(20, 1, 30, 30)))
+    output = net(test_data)
+    # print (y)
+    # Save the test data as well.
+    # Save the inference output ys
+    # Save the model params
+
+    mx.nd.save(os.path.join(get_model_path(model_name), ''.join([model_name, 
'-data'])), {'data': test_data})
+    save_inference_results(output, model_name)
+    if compare_versions(str(mxnet_version) , '1.1.0') < 0:
+        # v1.0.0 does not have the epoch param in the .exports API. Hence 
adding this safety net
+        net.export(os.path.join(get_model_path(model_name), model_name))
+    else:
+        # Saving with 0 since by default on 1.0.0 it was saved with 0, so 
simplifying things
+        net.export(os.path.join(get_model_path(model_name), model_name), 
epoch=0)
+
+
+
+def train_lstm_gluon_save_parameters_api():
+    # If this code is being run on version >= 1.2.1 only then execute it,
+    # since it uses save_parameters and load_parameters API
+    if compare_versions(str(mxnet_version), '1.2.1') < 0:
+        logging.warn('Found MXNet version %s and exiting because this version 
does not contain save_parameters'
+                     ' and load_parameters functions' % str(mxnet_version))
+        return
+
+    model_name = 'lstm_gluon_save_parameters_api'
+    logging.info('Saving files for model %s' % model_name)
+    create_model_folder(model_name)
+    net = SimpleLSTMModel()
+    weights = mx.initializer.Xavier(magnitude=2.57)
+    net.initialize(weights, ctx=[mx.cpu(0)])
+
+    test_data = mx.nd.array(np.random.uniform(-1, 1, size=(10, 30)))
+    output = net(test_data)
+    # print output
+    mx.nd.save(os.path.join(get_model_path(model_name), ''.join([model_name, 
'-data'])), {'data': test_data})
+    save_inference_results(output, model_name)
+    net.save_parameters(os.path.join(get_model_path(model_name), 
''.join([model_name, '-params'])))
+
+
+def create_root_folder():
+    base_path = os.getcwd()
+    version_path = os.path.join(base_path, 'models')
+    if not os.path.exists(version_path):
+        os.mkdir(version_path)
+
+
+if __name__ == '__main__':
+    create_root_folder()
+
+    train_module_checkpoint_api()
+    train_lenet_gluon_save_params_api()
+    train_lenet_gluon_hybrid_export_api()
+    train_lstm_gluon_save_parameters_api()
diff --git 
a/tests/nightly/model_backwards_compatibility_check/train_mxnet_legacy_models.sh
 
b/tests/nightly/model_backwards_compatibility_check/train_mxnet_legacy_models.sh
new file mode 100755
index 00000000000..336c61df24f
--- /dev/null
+++ 
b/tests/nightly/model_backwards_compatibility_check/train_mxnet_legacy_models.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#Author: Piyush Ghai
+
+set -ex
+
+run_models() {
+       echo '=========================='
+       echo "Running training files and preparing models"
+       echo '=========================='
+       python model_backwards_compat_train.py
+       echo '=========================='
+}
+
+install_mxnet() {
+       version=$1
+       echo "Installing MXNet "$version
+       pip install mxnet==$version --user
+}
+
+## Cuts the string and gives only the major version part.
+## eg : 12.3.0 ---> 12
+get_major_version() {
+    major=$(echo $1 | cut -d. -f1)
+    echo $major
+}
+
+## We read the current major version from libinfo.py file. And we extract the 
major version from it.
+curr_mxnet_version=$(grep -w "__version__" python/mxnet/libinfo.py | grep -o 
'".*"' | sed 's/"//g')
+## Expected in <numeric>.<numeric>.<numeric> format
+if [[ $curr_mxnet_version = 
[[:digit:][[:digit:]]*.[[:digit:][[:digit:]]*.[[:digit:][[:digit:]]* ]]
+then
+    curr_major_version=$(get_major_version $curr_mxnet_version)
+else
+    echo "The current major version does not comply with the regex expected. 
Exiting here."
+    exit 1
+fi
+
+echo `pwd`
+cd tests/nightly/model_backwards_compatibility_check
+echo `pwd`
+
+## Fetch the latest release tags, filtering out 'rcs' and filtering out some 
other irrelevant ones
+## This list is sorted in descending order chronologically.
+## Sample output for the below git tag command is : 1.2.0 utils 1.1.0 1.0.0 
0.12.1
+## so from this sample, we will pick up all the versions matching with the 
current latest version
+## Now while performing inference the latest version could be 1.3.0, which 
will help in validating models trained
+## on 1.1.0 and 1.2.0 by loading them on the latest version (1.3.0)
+## Over a period of time, the model repository will grow since with every new 
release we
+## upload models trained on newer versions as well through this script
+previous_versions=($(git tag --sort=-creatordate | grep --invert-match rc))
+count=0
+for version in ${previous_versions[*]}
+do
+       ## If MXNet major version starts with a number >=1. with a wildcard 
match for the minor version numbers
+       ## Could have used a [[:digit:]]+. as well but it was not working as a 
traditional regex in bash.
+       ## so had to resort to using [[:digit:]] [[:digit:]]* to indicate 
multi-digit version regex match
+       ## Example : #previous_versions=(12.0.0 12.12.0 12.12.12 2.0.0 1.0.4 
1.2.0 v.12.0.0 beta.12.0.1)
+       ## When passed through the regex, the output is : [12.0.0 12.12.0 
12.12.12 2.0.0 1.0.4 1.2.0]
+       if [[ $version = 
[[:digit:][[:digit:]]*.[[:digit:][[:digit:]]*.[[:digit:][[:digit:]]* ]]
+       then
+#          echo $version
+           major_version=$(get_major_version $version)
+           if [ ${major_version} -eq ${curr_major_version} ]
+               then
+#                      echo $version
+                       install_mxnet $version
+                       run_models
+           fi
+       fi
+done
+exit 0
diff --git 
a/tests/nightly/model_backwards_compatibility_check/upload_models_to_s3.sh 
b/tests/nightly/model_backwards_compatibility_check/upload_models_to_s3.sh
new file mode 100755
index 00000000000..16923980aca
--- /dev/null
+++ b/tests/nightly/model_backwards_compatibility_check/upload_models_to_s3.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#Author: Piyush Ghai
+
+set -ex
+
+echo "uploading model files to s3"
+
+echo `pwd`
+cd ./tests/nightly/model_backwards_compatibility_check/models/
+echo `pwd`
+
+# The directory structure will be as follows :
+# <mxnet-version>/<model-files> eg :
+# ls /tests/nightly/model_backwards_compatibility_check/models/
+# 1.1.0/   1.2.0/   1.2.1/
+# we upload these folders to S3 and the inference files understand them and 
pull of models off them
+for dir in $(ls `pwd`/)
+do
+    echo $dir
+    aws s3 cp $dir/ s3://mxnet-ci-prod-backwards-compatibility-models/$dir/ 
--recursive
+done
+
+echo "Deleting model files"
+cd ../
+rm -rf `pwd`/models


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] marcoabreu closed pull request #11626: [MXNET-651] MXNet Model Backwards Compatibility Checker

Reply via email to