This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch fit-api
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/fit-api by this push:
new 0748b47 [MXNET-1396][Fit-API] Update default handler logic (#14765)
0748b47 is described below
commit 0748b470ddb5fdfe235571bf8d4ad902ab757071
Author: Lai Wei <[email protected]>
AuthorDate: Tue Apr 23 20:33:03 2019 -0700
[MXNET-1396][Fit-API] Update default handler logic (#14765)
* move to nightly for binaries
* update default handler
* fix pylint
* trigger ci
* trigger ci
---
ci/docker/runtime_functions.sh | 8 +-
python/mxnet/gluon/contrib/estimator/estimator.py | 69 +++++++---
.../mxnet/gluon/contrib/estimator/event_handler.py | 2 +-
tests/nightly/Jenkinsfile | 16 ---
tests/nightly/JenkinsfileForBinaries | 8 ++
tests/python/unittest/test_gluon_estimator.py | 149 ++++++++++++++-------
tests/python/unittest/test_gluon_event_handler.py | 11 +-
7 files changed, 168 insertions(+), 95 deletions(-)
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index b194ebb..64da366 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -1296,18 +1296,12 @@ nightly_scala_demo_test_cpu() {
bash bin/run_im.sh
}
-nightly_estimator_gpu() {
+nightly_estimator() {
set -ex
cd /work/mxnet/tests/nightly/estimator
export PYTHONPATH=/work/mxnet/python/
python test_estimator_cnn.py --type gpu
python test_sentiment_rnn.py --type gpu
-}
-
-nightly_estimator_cpu() {
- set -ex
- cd /work/mxnet/tests/nightly/estimator
- export PYTHONPATH=/work/mxnet/python/
python test_estimator_cnn.py --type cpu
python test_sentiment_rnn.py --type cpu
}
diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py
b/python/mxnet/gluon/contrib/estimator/estimator.py
index 78672d2..d30595a 100644
--- a/python/mxnet/gluon/contrib/estimator/estimator.py
+++ b/python/mxnet/gluon/contrib/estimator/estimator.py
@@ -157,6 +157,8 @@ class Estimator(object):
Based on loss functions and training metrics in estimator
Create metric wrappers to record loss values,
Create copies of train loss/metric objects to record validation values
+ Returns train_metrics and val_metrics
+
"""
if any(not hasattr(self, attribute) for attribute in
['train_metrics', 'val_metrics']):
@@ -165,8 +167,7 @@ class Estimator(object):
self.train_metrics = [Accuracy()]
self.val_metrics = []
for loss in self.loss:
- self.train_metrics.append(Loss("Train " + ''.join([i for i in
loss.name if not i.isdigit()])))
- self.val_metrics.append(Loss("Validation " + ''.join([i for i
in loss.name if not i.isdigit()])))
+ self.train_metrics.append(Loss(''.join([i for i in loss.name
if not i.isdigit()])))
for metric in self.train_metrics:
val_metric = copy.deepcopy(metric)
metric.name = "Train " + metric.name
@@ -231,21 +232,9 @@ class Estimator(object):
from a data batch and load into contexts(devices)
"""
self.max_epochs = epochs
- event_handlers = event_handlers or []
- # provide default logging handler
- if not event_handlers:
- train_metrics, val_metrics = self.prepare_loss_and_metrics()
- event_handlers.append(MetricHandler(train_metrics=train_metrics))
- if val_data:
- event_handlers.append(ValidationHandler(val_data=val_data,
eval_fn=self.evaluate,
-
val_metrics=val_metrics))
- event_handlers.append(LoggingHandler(train_metrics=train_metrics,
- val_metrics=val_metrics))
- warnings.warn("No Event Handler specified, default %s are used. "
- "Please look at
gluon.contrib.estimator.event_handler for more detail." %
- ", ".join([handler.__class__.__name__ for handler in
event_handlers]))
- event_handlers.sort(key=lambda handler: getattr(handler, 'rank', 0),
reverse=True)
+ # provide default handlers
+ event_handlers = self._prepare_default_handlers(val_data,
event_handlers)
train_begin, epoch_begin, batch_begin, \
batch_end, epoch_end, train_end =
self._categorize_handlers(event_handlers)
@@ -297,6 +286,54 @@ class Estimator(object):
for handler in train_end:
handler.train_end(estimator_ref)
+ def _prepare_default_handlers(self, val_data, event_handlers):
+ event_handlers = event_handlers or []
+ default_handlers = []
+ train_metrics, val_metrics = self.prepare_loss_and_metrics()
+
+ if not any(isinstance(handler, MetricHandler) for handler in
event_handlers):
+ event_handlers.append(MetricHandler(train_metrics=train_metrics))
+ default_handlers.append("MetricHandler")
+
+ if val_data and not any(isinstance(handler, ValidationHandler) for
handler in event_handlers):
+ event_handlers.append(ValidationHandler(val_data=val_data,
eval_fn=self.evaluate,
+ val_metrics=val_metrics))
+ default_handlers.append("ValidationHandler")
+
+ if not any(isinstance(handler, LoggingHandler) for handler in
event_handlers):
+ event_handlers.append(LoggingHandler(train_metrics=train_metrics,
+ val_metrics=val_metrics))
+ default_handlers.append("LoggingHandler")
+
+ # if there is a mix of user defined event handlers and default event
handlers
+ # they should have the save set of loss and metrics
+ if default_handlers:
+ msg = "You are training with the following default event handlers:
%s. " \
+ "They use loss and metrics from
estimator.prepare_loss_and_metrics(). " \
+ "Please use the same set of metrics for all your other
handlers." % \
+ ", ".join(default_handlers)
+ warnings.warn(msg)
+ references = []
+ for handler in event_handlers:
+ for attribute in dir(handler):
+ if any(keyword in attribute for keyword in ['metric' or
'monitor']):
+ reference = getattr(handler, attribute)
+ if isinstance(reference, list):
+ references += reference
+ else:
+ references.append(reference)
+ for metric in references:
+ if metric and metric not in train_metrics + val_metrics:
+ msg = "We have added following default handlers for you:
%s and used " \
+ "estimator.prepare_loss_and_metrics() to pass
metrics to " \
+ "those handlers. Please use the same set of metrics
" \
+ "for all your handlers." % \
+ ", ".join(default_handlers)
+ raise ValueError(msg)
+
+ event_handlers.sort(key=lambda handler: getattr(handler, 'priority',
0))
+ return event_handlers
+
def _categorize_handlers(self, event_handlers):
"""
categorize handlers into 6 event lists to avoid calling empty methods
diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py
b/python/mxnet/gluon/contrib/estimator/event_handler.py
index 220aa31..d8c3c6e 100644
--- a/python/mxnet/gluon/contrib/estimator/event_handler.py
+++ b/python/mxnet/gluon/contrib/estimator/event_handler.py
@@ -299,7 +299,7 @@ class CheckpointHandler(BatchEnd, EpochEnd):
self.save_best_only = save_best_only
if self.save_best_only and not isinstance(self.monitor, EvalMetric):
raise ValueError("To save best model only, please provide one of
the metric objects as monitor, "
- "You can create these objects using
estimator.prepare_loss_and_metric()")
+ "You can get these objects using
estimator.prepare_loss_and_metric()")
self.epoch_period = epoch_period
self.batch_period = batch_period
self.num_batches = 0
diff --git a/tests/nightly/Jenkinsfile b/tests/nightly/Jenkinsfile
index 1be084c..758c864 100755
--- a/tests/nightly/Jenkinsfile
+++ b/tests/nightly/Jenkinsfile
@@ -136,22 +136,6 @@ core_logic: {
utils.docker_run('ubuntu_nightly_cpu', 'nightly_test_javascript',
false)
}
}
- },
- 'Gluon estimator: GPU': {
- node(NODE_LINUX_GPU) {
- ws('workspace/estimator-test-gpu') {
- utils.unpack_and_init('gpu', mx_lib)
- utils.docker_run('ubuntu_nightly_gpu', 'nightly_estimator_gpu', true)
- }
- }
- },
- 'Gluon estimator: CPU': {
- node(NODE_LINUX_CPU) {
- ws('workspace/estimator-test-cpu') {
- utils.unpack_and_init('cpu', mx_lib)
- utils.docker_run('ubuntu_nightly_cpu', 'nightly_estimator_cpu',
false)
- }
- }
}
}
}
diff --git a/tests/nightly/JenkinsfileForBinaries
b/tests/nightly/JenkinsfileForBinaries
index 53e1c30..7e77278 100755
--- a/tests/nightly/JenkinsfileForBinaries
+++ b/tests/nightly/JenkinsfileForBinaries
@@ -106,6 +106,14 @@ core_logic: {
utils.docker_run('ubuntu_nightly_gpu',
'nightly_tutorial_test_ubuntu_python3_gpu', true, '1500m')
}
}
+ },
+ 'Gluon estimator: GPU': {
+ node(NODE_LINUX_GPU) {
+ ws('workspace/estimator-test-gpu') {
+ utils.unpack_and_init('gpu', mx_lib)
+ utils.docker_run('ubuntu_nightly_gpu', 'nightly_estimator', true)
+ }
+ }
}
}
}
diff --git a/tests/python/unittest/test_gluon_estimator.py
b/tests/python/unittest/test_gluon_estimator.py
index 6f19f43..6432142 100644
--- a/tests/python/unittest/test_gluon_estimator.py
+++ b/tests/python/unittest/test_gluon_estimator.py
@@ -27,17 +27,28 @@ from mxnet.gluon.contrib.estimator import *
from nose.tools import assert_raises
-def get_model():
+def _get_test_network():
net = nn.Sequential()
net.add(nn.Dense(4, activation='relu', flatten=False))
return net
+def _get_test_data():
+ batch_size = 4
+ in_data = mx.nd.random.uniform(shape=(10, 3))
+ out_data = mx.nd.random.uniform(shape=(10, 4))
+ # Input dataloader
+ dataset = gluon.data.dataset.ArrayDataset(in_data, out_data)
+ dataloader = gluon.data.DataLoader(dataset, batch_size=batch_size)
+ dataiter = mx.io.NDArrayIter(data=in_data, label=out_data,
batch_size=batch_size)
+ return dataloader, dataiter
+
+
def test_fit():
''' test estimator with different train data types '''
- net = get_model()
+ net = _get_test_network()
+ dataloader, dataiter = _get_test_data()
num_epochs = 1
- batch_size = 4
ctx = mx.cpu()
loss = gluon.loss.L2Loss()
acc = mx.metric.Accuracy()
@@ -48,31 +59,25 @@ def test_fit():
metrics=acc,
trainer=trainer,
context=ctx)
- in_data = mx.nd.random.uniform(shape=(10, 3))
- out_data = mx.nd.random.uniform(shape=(10, 4))
- # Input dataloader
- dataset = gluon.data.dataset.ArrayDataset(in_data, out_data)
- train_dataloader = gluon.data.DataLoader(dataset, batch_size=batch_size)
- est.fit(train_data=train_dataloader,
+
+ est.fit(train_data=dataloader,
epochs=num_epochs)
- # Input dataiter
- train_dataiter = mx.io.NDArrayIter(data=in_data, label=out_data,
batch_size=batch_size)
with assert_raises(ValueError):
- est.fit(train_data=train_dataiter,
+ est.fit(train_data=dataiter,
epochs=num_epochs)
# Input NDArray
with assert_raises(ValueError):
- est.fit(train_data=[in_data, out_data],
+ est.fit(train_data=[mx.nd.ones(shape=(10, 3))],
epochs=num_epochs)
def test_validation():
''' test different validation data types'''
- net = get_model()
+ net = _get_test_network()
+ dataloader, dataiter = _get_test_data()
num_epochs = 1
- batch_size = 4
ctx = mx.cpu()
loss = gluon.loss.L2Loss()
acc = mx.metric.Accuracy()
@@ -83,41 +88,35 @@ def test_validation():
metrics=acc,
trainer=trainer,
context=ctx)
- in_data = mx.nd.random.uniform(shape=(10, 3))
- out_data = mx.nd.random.uniform(shape=(10, 4))
# Input dataloader
- dataset = gluon.data.dataset.ArrayDataset(in_data, out_data)
- train_dataloader = gluon.data.DataLoader(dataset, batch_size=batch_size)
- val_dataloader = gluon.data.DataLoader(dataset, batch_size=batch_size)
- est.fit(train_data=train_dataloader,
- val_data=val_dataloader,
+ est.fit(train_data=dataloader,
+ val_data=dataloader,
epochs=num_epochs)
- # Input dataiter
- train_dataiter = mx.io.NDArrayIter(data=in_data, label=out_data,
batch_size=batch_size)
- val_dataiter = mx.io.NDArrayIter(data=in_data, label=out_data,
batch_size=batch_size)
+ # using validation handler
+ train_metrics, val_metrics = est.prepare_loss_and_metrics()
+ validation_handler = ValidationHandler(val_data=dataloader,
eval_fn=est.evaluate,
+ val_metrics=val_metrics)
+
with assert_raises(ValueError):
- est.fit(train_data=train_dataiter,
- val_data=val_dataiter,
+ est.fit(train_data=dataiter,
+ val_data=dataiter,
epochs=num_epochs)
# Input NDArray
with assert_raises(ValueError):
- est.fit(train_data=[in_data, out_data],
- val_data=[in_data, out_data],
+ est.fit(train_data=[mx.nd.ones(shape=(10, 3))],
+ val_data=[mx.nd.ones(shape=(10, 3))],
epochs=num_epochs)
@unittest.skipIf(sys.version_info.major < 3, 'Test on python 3')
def test_initializer():
''' test with no initializer, inconsistent initializer '''
- net = get_model()
+ net = _get_test_network()
+ train_data, _ = _get_test_data()
num_epochs = 1
- batch_size = 4
ctx = mx.cpu()
- in_data = mx.nd.random.uniform(shape=(10, 3))
- out_data = mx.nd.random.uniform(shape=(10, 4))
- dataset = gluon.data.dataset.ArrayDataset(in_data, out_data)
- train_data = gluon.data.DataLoader(dataset, batch_size=batch_size)
+
loss = gluon.loss.L2Loss()
acc = mx.metric.Accuracy()
# no initializer
@@ -129,7 +128,7 @@ def test_initializer():
epochs=num_epochs)
# different initializer for net and estimator
- net = get_model()
+ net = _get_test_network()
net.initialize(mx.init.Xavier(), ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':
0.001})
# catch reinit warning
@@ -148,14 +147,11 @@ def test_initializer():
@unittest.skipIf(sys.version_info.major < 3, 'Test on python 3')
def test_trainer():
''' test with no trainer and invalid trainer '''
- net = get_model()
+ net = _get_test_network()
+ train_data, _ = _get_test_data()
num_epochs = 1
- batch_size = 4
ctx = mx.cpu()
- in_data = mx.nd.random.uniform(shape=(10, 3))
- out_data = mx.nd.random.uniform(shape=(10, 4))
- dataset = gluon.data.dataset.ArrayDataset(in_data, out_data)
- train_data = gluon.data.DataLoader(dataset, batch_size=batch_size)
+
loss = gluon.loss.L2Loss()
acc = mx.metric.Accuracy()
net.initialize(ctx=ctx)
@@ -181,14 +177,11 @@ def test_trainer():
def test_metric():
''' test with no metric, list of metrics, invalid metric '''
- net = get_model()
+ net = _get_test_network()
+ train_data, _ = _get_test_data()
num_epochs = 1
- batch_size = 4
ctx = mx.cpu()
- in_data = mx.nd.random.uniform(shape=(10, 3))
- out_data = mx.nd.random.uniform(shape=(10, 4))
- dataset = gluon.data.dataset.ArrayDataset(in_data, out_data)
- train_data = gluon.data.DataLoader(dataset, batch_size=batch_size)
+
loss = gluon.loss.L2Loss()
net.initialize(ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':
0.001})
@@ -227,7 +220,7 @@ def test_metric():
def test_loss():
''' test with invalid loss '''
- net = get_model()
+ net = _get_test_network()
ctx = mx.cpu()
acc = mx.metric.Accuracy()
net.initialize(ctx=ctx)
@@ -243,7 +236,7 @@ def test_loss():
def test_context():
''' test with no context, list of context, invalid context '''
- net = get_model()
+ net = _get_test_network()
loss = gluon.loss.L2Loss()
metrics = mx.metric.Accuracy()
# input no context
@@ -253,7 +246,7 @@ def test_context():
# input list of context
gpus = mx.context.num_gpus()
ctx = [mx.gpu(i) for i in range(gpus)] if gpus > 0 else [mx.cpu()]
- net = get_model()
+ net = _get_test_network()
est = Estimator(net=net,
loss=loss,
metrics=metrics,
@@ -309,3 +302,57 @@ def test_categorize_handlers():
assert len(batch_begin) == 2
assert len(batch_end) == 1
assert len(train_end) == 2
+
+
[email protected](sys.version_info.major < 3, 'Test on python 3')
+def test_default_handlers():
+ net = _get_test_network()
+ train_data, _ = _get_test_data()
+
+ num_epochs = 1
+ ctx = mx.cpu()
+
+ net.initialize(ctx=ctx)
+ trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':
0.001})
+
+ train_acc = mx.metric.RMSE()
+ loss = gluon.loss.L2Loss()
+
+ est = Estimator(net=net,
+ loss=loss,
+ metrics=train_acc,
+ trainer=trainer,
+ context=ctx)
+ # no handler
+ with warnings.catch_warnings(record=True) as w:
+ est.fit(train_data=train_data, epochs=num_epochs)
+ assert 'You are training with the' in str(w[-1].message)
+
+ # handler with prepared loss and metrics
+ train_metrics, val_metrics = est.prepare_loss_and_metrics()
+ logging = LoggingHandler(train_metrics=train_metrics,
val_metrics=val_metrics)
+ with warnings.catch_warnings(record=True) as w:
+ est.fit(train_data=train_data, epochs=num_epochs,
event_handlers=[logging])
+ assert 'You are training with the' in str(w[-1].message)
+ # provide metric handler by default
+ assert 'MetricHandler' in str(w[-1].message)
+
+ # handler with all user defined metrics
+ val_metrics = [mx.metric.RMSE("val acc")]
+ metric = MetricHandler(train_metrics=[train_acc])
+ logging = LoggingHandler(train_metrics=train_metrics,
val_metrics=val_metrics)
+ est.fit(train_data=train_data, epochs=num_epochs, event_handlers=[metric,
logging])
+
+ # handler with mixed metrics, some handler use metrics prepared by
estimator
+ # some handler use metrics user prepared
+ val_metrics = [mx.metric.RMSE("val acc")]
+ logging = LoggingHandler(train_metrics=train_metrics,
val_metrics=val_metrics)
+ with assert_raises(ValueError):
+ est.fit(train_data=train_data, epochs=num_epochs,
event_handlers=[logging])
+
+ # test handler order
+ early_stopping = EarlyStoppingHandler(monitor=val_metrics[0])
+ handlers = est._prepare_default_handlers(val_data=None,
event_handlers=[early_stopping])
+ assert len(handlers) == 3
+ assert isinstance(handlers[0], MetricHandler)
+ assert isinstance(handlers[2], LoggingHandler)
diff --git a/tests/python/unittest/test_gluon_event_handler.py
b/tests/python/unittest/test_gluon_event_handler.py
index e151281..cdb4264 100644
--- a/tests/python/unittest/test_gluon_event_handler.py
+++ b/tests/python/unittest/test_gluon_event_handler.py
@@ -19,10 +19,11 @@ import os
import tempfile
import mxnet as mx
+from common import TemporaryDirectory
from mxnet import nd
from mxnet.gluon import nn, loss
from mxnet.gluon.contrib.estimator import estimator, event_handler
-from common import TemporaryDirectory
+
def _get_test_network():
net = nn.Sequential()
@@ -92,10 +93,12 @@ def test_logging():
net = _get_test_network()
ce_loss = loss.SoftmaxCrossEntropyLoss()
- ce_loss_metric = mx.metric.Loss(ce_loss.name)
acc = mx.metric.Accuracy()
est = estimator.Estimator(net, loss=ce_loss, metrics=acc)
+ train_metrics, val_metrics = est.prepare_loss_and_metrics()
logging_handler = [event_handler.LoggingHandler(file_name=file_name,
- file_location=tmpdir,
train_metrics=[acc, ce_loss_metric])]
+ file_location=tmpdir,
+
train_metrics=train_metrics,
+
val_metrics=val_metrics)]
est.fit(test_data, event_handlers=logging_handler, epochs=1)
- assert os.path.isfile(output_dir)
\ No newline at end of file
+ assert os.path.isfile(output_dir)