[GitHub] [incubator-mxnet] roywei commented on a change in pull request #14629: [MXNET-1333] Estimator and Fit API

GitBox Thu, 02 May 2019 14:37:28 -0700

roywei commented on a change in pull request #14629: [MXNET-1333] Estimator and 
Fit API
URL: https://github.com/apache/incubator-mxnet/pull/14629#discussion_r280602388


 ##########
 File path: python/mxnet/gluon/contrib/estimator/estimator.py
 ##########
 @@ -0,0 +1,376 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+# pylint: disable=wildcard-import, unused-variable
+"""Gluon Estimator"""
+
+import copy
+import warnings
+import weakref
+
+from .event_handler import MetricHandler, ValidationHandler, LoggingHandler
+from .event_handler import TrainBegin, EpochBegin, BatchBegin, BatchEnd, 
EpochEnd, TrainEnd
+from .... import gluon, autograd
+from ....context import Context, cpu, gpu, num_gpus
+from ....metric import EvalMetric, Loss, Accuracy
+
+__all__ = ['Estimator']
+
+
+class Estimator(object):
+    """Estimator Class for easy model training
+
+    :py:class:`Estimator` can be used to facilitate the training & validation 
process
+
+
+    Parameters
+    ----------
+    loss : gluon.loss.Loss or list of gluon.loss.Loss
+        Loss(objective functions) to calculate during training
+    metrics : EvalMetric or list of EvalMetric
+        Metrics for evaluating models
+    initializer : Initializer
+        initializer to initialize the network
+    trainer : Trainer
+        Trainer to apply optimizer on network parameters
+    context : Context or list of Context
+        device(s) to run the training on
+    """
+
+    def __init__(self, net,
+                 loss,
+                 metrics=None,
+                 initializer=None,
+                 trainer=None,
+                 context=None):
+
+        self.net = net
+        self.loss = self._check_loss(loss)
+        self.train_metrics = self._check_metrics(metrics)
+
+        self.context = self._check_context(context)
+        self._initialize(initializer)
+        self.trainer = self._check_trainer(trainer)
+
+    def _check_loss(self, loss):
+        if isinstance(loss, gluon.loss.Loss):
+            loss = [loss]
+        elif isinstance(loss, list) or all([isinstance(l, gluon.loss.Loss) for 
l in loss]):
+            loss = loss
+        else:
+            raise ValueError("loss must be a Loss or a list of Loss, "
+                             "refer to gluon.loss.Loss:{}".format(loss))
+        return loss
+
+    def _check_metrics(self, metrics):
+        if isinstance(metrics, EvalMetric):
+            metrics = [metrics]
+        else:
+            metrics = metrics or []
+            if not all([isinstance(metric, EvalMetric) for metric in metrics]):
+                raise ValueError("metrics must be a Metric or a list of 
Metric, "
+                                 "refer to 
mxnet.metric.EvalMetric:{}".format(metrics))
+        return metrics
+
+    def _check_context(self, context):
+        # infer available context
+        gpus = num_gpus()
+        available_gpus = [gpu(i) for i in range(gpus)]
+
+        if context:
+            # check context values, only accept Context or a list of Context
+            if isinstance(context, Context):
+                context = [context]
+            elif isinstance(context, list) and all([isinstance(c, Context) for 
c in context]):
+                context = context
+            else:
+                raise ValueError("context must be a Context or a list of 
Context, "
+                                 "for example mx.cpu() or [mx.gpu(0), 
mx.gpu(1)], "
+                                 "refer to mxnet.Context:{}".format(context))
+            for ctx in context:
+                assert ctx in available_gpus or str(ctx).startswith('cpu'), \
+                    "%s is not available, please make sure " \
+                    "your context is in one of: mx.cpu(), %s" % \
+                    (ctx, ", ".join([str(ctx) for ctx in available_gpus]))
+        else:
+            # provide default context
+            if gpus > 0:
+                # only use 1 GPU by default
+                if gpus > 1:
+                    warnings.warn("You have multiple GPUs, gpu(0) will be used 
by default."
+                                  "To utilize all your GPUs, specify context 
as a list of gpus, "
+                                  "e.g. context=[mx.gpu(0), mx.gpu(1)] ")
+                context = [gpu(0)]
+            else:
+                context = [cpu()]
+        return context
+
+    def _initialize(self, initializer):
+        # initialize the network
+        if initializer:
+            if self._is_initialized():
+                # if already initialized, re-init with user specified 
initializer
+                warnings.warn("Network already initialized, re-initializing 
with %s. "
+                              "You don't need to pass initializer if you 
already "
+                              "initialized your net." % 
type(initializer).__name__)
+                self.net.initialize(init=initializer, ctx=self.context, 
force_reinit=True)
+            else:
+                # initialize with user specified initializer
+                self.net.initialize(init=initializer, ctx=self.context, 
force_reinit=False)
+        else:
+            if not self._is_initialized():
+                self.net.initialize(ctx=self.context)
+
+    def _check_trainer(self, trainer):
+        # handle trainer
+        if not trainer:
+            warnings.warn("No trainer specified, default SGD optimizer "
+                          "with learning rate 0.001 is used.")
+            trainer = gluon.Trainer(self.net.collect_params(),
+                                    'sgd', {'learning_rate': 0.001})
+        elif not isinstance(trainer, gluon.Trainer):
+            raise ValueError("Trainer must be a Gluon Trainer instance, refer 
to "
+                             "gluon.Trainer:{}".format(trainer))
+        return trainer
+
+    def _is_initialized(self):
+        param_dict = self.net.collect_params()
+        for param in param_dict:
+            try:
+                param_dict[param].list_ctx()
+            except RuntimeError:
+                return False
+        return True
+
+    def _get_data_and_label(self, batch, ctx):
+        data = batch[0]
+        label = batch[1]
+        data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0)
+        label = gluon.utils.split_and_load(label, ctx_list=ctx, batch_axis=0)
+        return data, label
+
+    def prepare_loss_and_metrics(self):
+        """
+        Based on loss functions and training metrics in estimator
+        Create metric wrappers to record loss values,
+        Create copies of train loss/metric objects to record validation values
+        Returns train_metrics and val_metrics
+
+        """
+        if any(not hasattr(self, attribute) for attribute in
+               ['train_metrics', 'val_metrics']):
+            # Use default mx.metric.Accuracy() for 
gluon.loss.SoftmaxCrossEntropyLoss()
+            if not self.train_metrics and any([isinstance(l, 
gluon.loss.SoftmaxCrossEntropyLoss) for l in self.loss]):
+                self.train_metrics = [Accuracy()]
+            self.val_metrics = []
+            for loss in self.loss:
+                # remove trailing numbers from loss name to avoid confusion
+                self.train_metrics.append(Loss(loss.name.rstrip('1234567890')))
 
 Review comment:
   bydefault, loss.name will have node number in name, I'm removing trailing 
numbers.
   ```
   >>>loss=mx.gluon.loss.L2Loss()
   >>> loss.name
   'l2loss0'
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] [incubator-mxnet] roywei commented on a change in pull request #14629: [MXNET-1333] Estimator and Fit API

Reply via email to