[tvm] branch main updated: [AutoScheduler][AutoTVM] Enable xgboost >= 1.7.x new changes (#14036)

tqchen Sat, 18 Feb 2023 16:21:15 -0800

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/main by this push:
     new 7249904622 [AutoScheduler][AutoTVM] Enable xgboost >= 1.7.x new 
changes (#14036)
7249904622 is described below

commit 7249904622e45d37d3d74a11d88929191a3bc622
Author: Balint Cristian <[email protected]>
AuthorDate: Sun Feb 19 02:19:51 2023 +0200

    [AutoScheduler][AutoTVM] Enable xgboost >= 1.7.x new changes (#14036)
    
    Enable xgboost >= 1.7.x new changes
---
 docs/install/from_source.rst                      |   4 +-
 python/gen_requirements.py                        |   2 +-
 python/tvm/auto_scheduler/cost_model/xgb_model.py | 202 ++++++++++++----------
 python/tvm/autotvm/tuner/xgboost_cost_model.py    | 201 ++++++++++++---------
 4 files changed, 238 insertions(+), 171 deletions(-)

diff --git a/docs/install/from_source.rst b/docs/install/from_source.rst
index 37ca72d80f..c2e4c7acd9 100644
--- a/docs/install/from_source.rst
+++ b/docs/install/from_source.rst
@@ -347,7 +347,7 @@ like ``virtualenv``.
 
    .. code:: bash
 
-       pip3 install --user tornado psutil 'xgboost<1.6.0' cloudpickle
+       pip3 install --user tornado psutil 'xgboost>=1.1.0' cloudpickle
 
 Note on M1 macs, you may have trouble installing xgboost / scipy. scipy and 
xgboost requires some additional dependencies to be installed,
 including openblas and its dependencies. Use the following commands to install 
scipy and xgboost with the required dependencies and
@@ -363,7 +363,7 @@ configuration. A workaround for this is to do the following 
commands:
 
         pip install scipy --no-use-pep517
 
-        pip install 'xgboost<1.6.0'
+        pip install 'xgboost>=1.1.0'
 
 Install Contrib Libraries
 -------------------------
diff --git a/python/gen_requirements.py b/python/gen_requirements.py
index 7f5fe57adb..09fb57ee94 100644
--- a/python/gen_requirements.py
+++ b/python/gen_requirements.py
@@ -276,7 +276,7 @@ CONSTRAINTS = [
     ("torch", None),
     ("torchvision", None),
     ("tornado", None),
-    ("xgboost", ">=1.1.0,<1.6.0"),  # From PR #4953 & Issue #12009
+    ("xgboost", ">=1.1.0"),  # From PR #4953 & Issue #12009
 ]
 
 
################################################################################
diff --git a/python/tvm/auto_scheduler/cost_model/xgb_model.py 
b/python/tvm/auto_scheduler/cost_model/xgb_model.py
index a4e39b9061..328e25db7b 100644
--- a/python/tvm/auto_scheduler/cost_model/xgb_model.py
+++ b/python/tvm/auto_scheduler/cost_model/xgb_model.py
@@ -19,6 +19,7 @@
 """Cost model based on xgboost"""
 import multiprocessing
 import logging
+from typing import Dict
 from collections import defaultdict
 
 import numpy as np
@@ -28,6 +29,14 @@ from .cost_model import PythonBasedModel
 from ..feature import get_per_store_features_from_measure_pairs, 
get_per_store_features_from_states
 from ..measure_record import RecordReader
 
+try:
+    from xgboost.callback import TrainingCallback  # type: ignore
+except ImportError:
+
+    class TrainingCallback:  # type: ignore
+        pass
+
+
 xgb = None
 
 logger = logging.getLogger("auto_scheduler")
@@ -198,7 +207,7 @@ class XGBModel(PythonBasedModel):
             num_boost_round=10000,
             obj=pack_sum_square_error,
             callbacks=[
-                custom_callback(
+                CustomCallback(
                     stopping_rounds=50,
                     metric="tr-p-rmse",
                     fevals=[
@@ -539,125 +548,144 @@ def pack_sum_average_peak_score(N):
     return feval
 
 
-def custom_callback(
-    stopping_rounds,
-    metric,
-    fevals,
-    evals=(),
-    log_file=None,
-    maximize=False,
-    verbose_eval=True,
-    skip_every=2,
-):
-    """Callback function for xgboost to support multiple custom evaluation 
functions"""
-    # pylint: disable=import-outside-toplevel
-    from xgboost.core import EarlyStopException
-    from xgboost.callback import _fmt_metric
-
-    try:
-        from xgboost.training import aggcv
-    except ImportError:
-        from xgboost.callback import _aggcv as aggcv
-
-    state = {}
-    metric_shortname = metric.split("-")[1]
-
-    def init(env):
-        """internal function"""
-        bst = env.model
-
-        state["maximize_score"] = maximize
-        state["best_iteration"] = 0
-        if maximize:
-            state["best_score"] = float("-inf")
-        else:
-            state["best_score"] = float("inf")
+class XGBoostCallback(TrainingCallback):
+    """Base class for XGBoost callbacks."""
 
-        if bst is not None:
-            if bst.attr("best_score") is not None:
-                state["best_score"] = float(bst.attr("best_score"))
-                state["best_iteration"] = int(bst.attr("best_iteration"))
-                state["best_msg"] = bst.attr("best_msg")
-            else:
-                bst.set_attr(best_iteration=str(state["best_iteration"]))
-                bst.set_attr(best_score=str(state["best_score"]))
-        else:
-            assert env.cvfolds is not None
+    def __call__(self, env: "xgb.core.CallbackEnv"):
+        # Compatibility with xgboost < 1.3
+        return self.after_iteration(env.model, env.iteration, 
env.evaluation_result_list)
 
-    def callback(env):
-        """internal function"""
-        if not state:
-            init(env)
+    def after_iteration(self, model: "xgb.Booster", epoch: int, evals_log: 
Dict):
+        raise NotImplementedError
+
+
+class CustomCallback(XGBoostCallback):
+    """
+    Callback function for xgboost.
+    Support custom evaluation function and early-stopping.
+    """
+
+    def __init__(
+        self,
+        stopping_rounds,
+        metric,
+        fevals,
+        evals=(),
+        log_file=None,
+        maximize=False,
+        verbose_eval=True,
+        skip_every=2,
+    ):
+        """Init function"""
+        self.stopping_rounds = stopping_rounds
+        self.metric = metric
+        self.metric_shortname = metric.split("-")[1]
+        self.fevals = fevals
+        self.evals = evals
+        self.log_file = log_file
+        self.maximize = maximize
+        self.verbose_eval = verbose_eval
+        self.skip_every = skip_every
+        self.state = {}
 
-        bst = env.model
-        i = env.iteration
-        cvfolds = env.cvfolds
+    def after_iteration(self, model: "xgb.Booster", epoch: int, evals_log: 
Dict):
+        """Run after each iteration.  Return True when training should stop."""
+        # pylint:disable = import-outside-toplevel
+        try:
+            from xgboost.callback import _fmt_metric  # type: ignore
+        except ImportError:
+            # Compatibility with xgboost >= 1.6
+            def _fmt_metric(value, show_stdv=True):
+                """format metric string"""
+                if len(value) == 2:
+                    return f"{value[0]}:{value[1]:.5f}"
+                if len(value) == 3:
+                    if show_stdv:
+                        return f"{value[0]}:{value[1]:.5f}+{value[2]:.5f}"
+                    return f"{value[0]}:{value[1]:.5f}"
+                raise ValueError("wrong metric value", value)
+
+        ##### init state #####
+        if not self.state:
+            self.state["maximize_score"] = self.maximize
+            self.state["best_iteration"] = 0
+            if self.maximize:
+                self.state["best_score"] = float("-inf")
+            else:
+                self.state["best_score"] = float("inf")
 
+            assert model is not None
+            if model.attr("best_score") is not None:
+                self.state["best_score"] = float(model.attr("best_score"))
+                self.state["best_iteration"] = 
int(model.attr("best_iteration"))
+                self.state["best_msg"] = model.attr("best_msg")
+            else:
+                
model.set_attr(best_iteration=str(self.state["best_iteration"]))
+                model.set_attr(best_score=str(self.state["best_score"]))
         res_dict = {}
 
-        if i % skip_every == 1:
-            return
+        if epoch % self.skip_every == 1:
+            return False
 
         ##### evaluation #####
-        if cvfolds is not None:
-            for feval in fevals:
-                tmp = aggcv([f.eval(i, feval) for f in cvfolds])
-                for k, mean, std in tmp:
-                    res_dict[k] = [mean, std]
-        else:
-            for feval in fevals:
-                bst_eval = bst.eval_set(evals, i, feval)
-                res = [x.split(":") for x in bst_eval.split()]
-                for kv in res[1:]:
-                    res_dict[kv[0]] = [float(kv[1])]
+        for feval in self.fevals:
+            bst_eval = model.eval_set(self.evals, epoch, feval)
+            res = [x.split(":") for x in bst_eval.split()]
+            for kv in res[1:]:
+                res_dict[kv[0]] = [float(kv[1])]
 
         eval_res = []
         keys = list(res_dict.keys())
-        keys.sort(key=lambda x: x if metric_shortname not in x else "a" + x)
+        keys.sort(key=lambda x: x if self.metric_shortname not in x else "a" + 
x)
         for key in keys:
             v = res_dict[key]
             eval_res.append([key] + v)
 
         ##### print eval result #####
-        if not isinstance(verbose_eval, bool) and verbose_eval and i % 
verbose_eval == 0:
-            infos = ["XGB iter: %3d" % i]
+        if (
+            not isinstance(self.verbose_eval, bool)
+            and self.verbose_eval
+            and epoch % self.verbose_eval == 0
+        ):
+            infos = ["XGB iter: %3d" % epoch]
             for item in eval_res:
                 if "null" in item[0]:
                     continue
                 infos.append("%s: %.6f" % (item[0], item[1]))
 
             logger.debug("\t".join(infos))
-            if log_file:
-                with open(log_file, "a") as fout:
+            if self.log_file:
+                with open(self.log_file, "a") as fout:
                     fout.write("\t".join(infos) + "\n")
 
         ##### choose score and do early stopping #####
         score = None
         for item in eval_res:
-            if item[0] == metric:
+            if item[0] == self.metric:
                 score = item[1]
                 break
         assert score is not None
 
-        best_score = state["best_score"]
-        best_iteration = state["best_iteration"]
-        maximize_score = state["maximize_score"]
+        best_score = self.state["best_score"]
+        best_iteration = self.state["best_iteration"]
+        maximize_score = self.state["maximize_score"]
+
         if (maximize_score and score > best_score) or (not maximize_score and 
score < best_score):
-            msg = "[%d] %s" % (env.iteration, "\t".join([_fmt_metric(x) for x 
in eval_res]))
-            state["best_msg"] = msg
-            state["best_score"] = score
-            state["best_iteration"] = env.iteration
+            msg = "[%d] %s" % (epoch, "\t".join([_fmt_metric(x) for x in 
eval_res]))
+            self.state["best_msg"] = msg
+            self.state["best_score"] = score
+            self.state["best_iteration"] = epoch
             # save the property to attributes, so they will occur in 
checkpoint.
-            if env.model is not None:
-                env.model.set_attr(
-                    best_score=str(state["best_score"]),
-                    best_iteration=str(state["best_iteration"]),
-                    best_msg=state["best_msg"],
+            if model is not None:
+                model.set_attr(
+                    best_score=str(self.state["best_score"]),
+                    best_iteration=str(self.state["best_iteration"]),
+                    best_msg=self.state["best_msg"],
                 )
-        elif env.iteration - best_iteration >= stopping_rounds:
-            best_msg = state["best_msg"]
-            if verbose_eval and env.rank == 0:
+        elif epoch - best_iteration >= self.stopping_rounds:
+            best_msg = self.state["best_msg"]
+            if self.verbose_eval:
                 logger.debug("XGB stopped. Best iteration: %s ", best_msg)
-            raise EarlyStopException(best_iteration)
+            return True
 
-    return callback
+        return False
diff --git a/python/tvm/autotvm/tuner/xgboost_cost_model.py 
b/python/tvm/autotvm/tuner/xgboost_cost_model.py
index 6fa04f336f..a80c350903 100644
--- a/python/tvm/autotvm/tuner/xgboost_cost_model.py
+++ b/python/tvm/autotvm/tuner/xgboost_cost_model.py
@@ -20,6 +20,8 @@
 import logging
 import time
 
+from typing import Dict
+
 import numpy as np
 from tvm.contrib.popen_pool import PopenPoolExecutor, StatusKind
 
@@ -28,6 +30,14 @@ from ..utils import get_rank
 from .metric import cover_curve, max_curve, recall_curve
 from .model_based_tuner import CostModel, FeatureCache
 
+try:
+    from xgboost.callback import TrainingCallback  # type: ignore
+except ImportError:
+
+    class TrainingCallback:  # type: ignore
+        pass
+
+
 xgb = None
 
 logger = logging.getLogger("autotvm")
@@ -198,7 +208,7 @@ class XGBoostCostModel(CostModel):
             dtrain,
             num_boost_round=8000,
             callbacks=[
-                custom_callback(
+                CustomCallback(
                     stopping_rounds=20,
                     metric="tr-a-recall@%d" % plan_size,
                     evals=[(dtrain, "tr")],
@@ -282,7 +292,7 @@ class XGBoostCostModel(CostModel):
             dtrain,
             num_boost_round=400,
             callbacks=[
-                custom_callback(
+                CustomCallback(
                     stopping_rounds=100,
                     metric="tr-a-recall@%d" % plan_size,
                     evals=[(dtrain, "tr")],
@@ -443,118 +453,147 @@ def _extract_curve_feature_log(arg):
     return x, y
 
 
-def custom_callback(
-    stopping_rounds, metric, fevals, evals=(), log_file=None, maximize=False, 
verbose_eval=True
-):
-    """callback function for xgboost to support multiple custom evaluation 
functions"""
-    # pylint: disable=import-outside-toplevel
-    from xgboost.callback import _fmt_metric
-    from xgboost.core import EarlyStopException
+class XGBoostCallback(TrainingCallback):
+    """Base class for XGBoost callbacks."""
 
-    try:
-        from xgboost.training import aggcv
-    except ImportError:
-        from xgboost.callback import _aggcv as aggcv
+    def __call__(self, env: "xgb.core.CallbackEnv"):
+        # Compatibility with xgboost < 1.3
+        return self.after_iteration(env.model, env.iteration, 
env.evaluation_result_list)
 
-    state = {}
-    metric_shortname = metric.split("-")[1]
+    def after_iteration(self, model: "xgb.Booster", epoch: int, evals_log: 
Dict):
+        raise NotImplementedError
 
-    def init(env):
-        """internal function"""
-        bst = env.model
 
-        state["maximize_score"] = maximize
-        state["best_iteration"] = 0
-        if maximize:
-            state["best_score"] = float("-inf")
-        else:
-            state["best_score"] = float("inf")
+class CustomCallback(XGBoostCallback):
+    """
+    Callback function for xgboost.
+    Support custom evaluation function and early-stopping.
+    """
 
-        if bst is not None:
-            if bst.attr("best_score") is not None:
-                state["best_score"] = float(bst.attr("best_score"))
-                state["best_iteration"] = int(bst.attr("best_iteration"))
-                state["best_msg"] = bst.attr("best_msg")
+    def __init__(
+        self,
+        stopping_rounds,
+        metric,
+        fevals,
+        evals=(),
+        log_file=None,
+        maximize=False,
+        verbose_eval=True,
+        skip_every=2,
+    ):
+        """Init function"""
+        self.stopping_rounds = stopping_rounds
+        self.metric = metric
+        self.metric_shortname = metric.split("-")[1]
+        self.fevals = fevals
+        self.evals = evals
+        self.log_file = log_file
+        self.maximize = maximize
+        self.verbose_eval = verbose_eval
+        self.skip_every = skip_every
+        self.state = {}
+
+    def after_iteration(self, model: "xgb.Booster", epoch: int, evals_log: 
Dict):
+        """Run after each iteration.  Return True when training should stop."""
+        # pylint:disable = import-outside-toplevel
+        try:
+            from xgboost.callback import _fmt_metric  # type: ignore
+        except ImportError:
+            # Compatibility with xgboost >= 1.6
+            def _fmt_metric(value, show_stdv=True):
+                """format metric string"""
+                if len(value) == 2:
+                    return f"{value[0]}:{value[1]:.5f}"
+                if len(value) == 3:
+                    if show_stdv:
+                        return f"{value[0]}:{value[1]:.5f}+{value[2]:.5f}"
+                    return f"{value[0]}:{value[1]:.5f}"
+                raise ValueError("wrong metric value", value)
+
+        ##### init state #####
+        if not self.state:
+            self.state["maximize_score"] = self.maximize
+            self.state["best_iteration"] = 0
+            if self.maximize:
+                self.state["best_score"] = float("-inf")
             else:
-                bst.set_attr(best_iteration=str(state["best_iteration"]))
-                bst.set_attr(best_score=str(state["best_score"]))
-        else:
-            assert env.cvfolds is not None
-
-    def callback(env):
-        """internal function"""
-        if not state:
-            init(env)
-
-        bst = env.model
-        i = env.iteration
-        cvfolds = env.cvfolds
+                self.state["best_score"] = float("inf")
 
+            assert model is not None
+            if model.attr("best_score") is not None:
+                self.state["best_score"] = float(model.attr("best_score"))
+                self.state["best_iteration"] = 
int(model.attr("best_iteration"))
+                self.state["best_msg"] = model.attr("best_msg")
+            else:
+                
model.set_attr(best_iteration=str(self.state["best_iteration"]))
+                model.set_attr(best_score=str(self.state["best_score"]))
         res_dict = {}
 
+        if epoch % self.skip_every == 1:
+            return False
+
         ##### evaluation #####
-        if cvfolds is not None:
-            for feval in fevals:
-                tmp = aggcv([f.eval(i, feval) for f in cvfolds])
-                for k, mean, std in tmp:
-                    res_dict[k] = [mean, std]
-        else:
-            for feval in fevals:
-                bst_eval = bst.eval_set(evals, i, feval)
-                res = [x.split(":") for x in bst_eval.split()]
-                for kv in res[1:]:
-                    res_dict[kv[0]] = [float(kv[1])]
+        for feval in self.fevals:
+            bst_eval = model.eval_set(self.evals, epoch, feval)
+            res = [x.split(":") for x in bst_eval.split()]
+            for kv in res[1:]:
+                res_dict[kv[0]] = [float(kv[1])]
 
         eval_res = []
         keys = list(res_dict.keys())
-        keys.sort(key=lambda x: x if metric_shortname not in x else "a" + x)
+        keys.sort(key=lambda x: x if self.metric_shortname not in x else "a" + 
x)
         for key in keys:
             v = res_dict[key]
             eval_res.append([key] + v)
 
         ##### print eval result #####
-        infos = ["XGB iter: %3d" % i]
-        for item in eval_res:
-            if "null" in item[0]:
-                continue
-            infos.append("%s: %.6f" % (item[0], item[1]))
+        if (
+            not isinstance(self.verbose_eval, bool)
+            and self.verbose_eval
+            and epoch % self.verbose_eval == 0
+        ):
+            infos = ["XGB iter: %3d" % epoch]
+            for item in eval_res:
+                if "null" in item[0]:
+                    continue
+                infos.append("%s: %.6f" % (item[0], item[1]))
 
-        if not isinstance(verbose_eval, bool) and verbose_eval and i % 
verbose_eval == 0:
             logger.debug("\t".join(infos))
-        if log_file:
-            with open(log_file, "a") as fout:
-                fout.write("\t".join(infos) + "\n")
+            if self.log_file:
+                with open(self.log_file, "a") as fout:
+                    fout.write("\t".join(infos) + "\n")
 
         ##### choose score and do early stopping #####
         score = None
         for item in eval_res:
-            if item[0] == metric:
+            if item[0] == self.metric:
                 score = item[1]
                 break
         assert score is not None
 
-        best_score = state["best_score"]
-        best_iteration = state["best_iteration"]
-        maximize_score = state["maximize_score"]
+        best_score = self.state["best_score"]
+        best_iteration = self.state["best_iteration"]
+        maximize_score = self.state["maximize_score"]
+
         if (maximize_score and score > best_score) or (not maximize_score and 
score < best_score):
-            msg = "[%d] %s" % (env.iteration, "\t".join([_fmt_metric(x) for x 
in eval_res]))
-            state["best_msg"] = msg
-            state["best_score"] = score
-            state["best_iteration"] = env.iteration
+            msg = "[%d] %s" % (epoch, "\t".join([_fmt_metric(x) for x in 
eval_res]))
+            self.state["best_msg"] = msg
+            self.state["best_score"] = score
+            self.state["best_iteration"] = epoch
             # save the property to attributes, so they will occur in 
checkpoint.
-            if env.model is not None:
-                env.model.set_attr(
-                    best_score=str(state["best_score"]),
-                    best_iteration=str(state["best_iteration"]),
-                    best_msg=state["best_msg"],
+            if model is not None:
+                model.set_attr(
+                    best_score=str(self.state["best_score"]),
+                    best_iteration=str(self.state["best_iteration"]),
+                    best_msg=self.state["best_msg"],
                 )
-        elif env.iteration - best_iteration >= stopping_rounds:
-            best_msg = state["best_msg"]
-            if verbose_eval and env.rank == 0:
+        elif epoch - best_iteration >= self.stopping_rounds:
+            best_msg = self.state["best_msg"]
+            if self.verbose_eval:
                 logger.debug("XGB stopped. Best iteration: %s ", best_msg)
-            raise EarlyStopException(best_iteration)
+            return True
 
-    return callback
+        return False
 
 
 # feval wrapper for xgboost

[tvm] branch main updated: [AutoScheduler][AutoTVM] Enable xgboost >= 1.7.x new changes (#14036)

Reply via email to