sandeep-krishnamurthy closed pull request #12182: [MXNET-698] Correct
train-metric log to reflect epoch metric
URL: https://github.com/apache/incubator-mxnet/pull/12182
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py
index 5c762808e15..e1c1714445d 100644
--- a/python/mxnet/callback.py
+++ b/python/mxnet/callback.py
@@ -165,9 +165,13 @@ def __call__(self, param):
name_value = param.eval_metric.get_name_value()
if self.auto_reset:
param.eval_metric.reset()
- msg = 'Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec'
- msg += '\t%s=%f'*len(name_value)
- logging.info(msg, param.epoch, count, speed,
*sum(name_value, ()))
+ msg = 'Epoch[%d] Batch [%d-%d]\tSpeed: %.2f
samples/sec'
+ msg += '\t%s=%f'*len(name_value)
+ logging.info(msg, param.epoch, count-self.frequent,
count, speed, *sum(name_value, ()))
+ else:
+ msg = 'Epoch[%d] Batch [0-%d]\tSpeed: %.2f samples/sec'
+ msg += '\t%s=%f'*len(name_value)
+ logging.info(msg, param.epoch, count, speed,
*sum(name_value, ()))
else:
logging.info("Iter[%d] Batch [%d]\tSpeed: %.2f
samples/sec",
param.epoch, count, speed)
diff --git a/python/mxnet/module/base_module.py
b/python/mxnet/module/base_module.py
index 08ab8fa89e4..c534261eacc 100644
--- a/python/mxnet/module/base_module.py
+++ b/python/mxnet/module/base_module.py
@@ -22,6 +22,7 @@
import time
import logging
import warnings
+import copy
import numpy as np
from .. import metric
@@ -507,6 +508,7 @@ def fit(self, train_data, eval_data=None, eval_metric='acc',
validation_metric = eval_metric
if not isinstance(eval_metric, metric.EvalMetric):
eval_metric = metric.create(eval_metric)
+ epoch_eval_metric = copy.deepcopy(eval_metric)
################################################################################
# training loop
@@ -514,6 +516,7 @@ def fit(self, train_data, eval_data=None, eval_metric='acc',
for epoch in range(begin_epoch, num_epoch):
tic = time.time()
eval_metric.reset()
+ epoch_eval_metric.reset()
nbatch = 0
data_iter = iter(train_data)
end_of_batch = False
@@ -529,8 +532,12 @@ def fit(self, train_data, eval_data=None,
eval_metric='acc',
self.update_metric(eval_metric,
[db.label for db in data_batch],
pre_sliced=True)
+ self.update_metric(epoch_eval_metric,
+ [db.label for db in data_batch],
+ pre_sliced=True)
else:
self.update_metric(eval_metric, data_batch.label)
+ self.update_metric(epoch_eval_metric, data_batch.label)
try:
# pre fetch next batch
@@ -543,7 +550,7 @@ def fit(self, train_data, eval_data=None, eval_metric='acc',
monitor.toc_print()
if end_of_batch:
- eval_name_vals = eval_metric.get_name_value()
+ eval_name_vals = epoch_eval_metric.get_name_value()
if batch_end_callback is not None:
batch_end_params = BatchEndParam(epoch=epoch,
nbatch=nbatch,
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services