jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/402114 )

Change subject: Merge remote-tracking branch 'upstream/master'
......................................................................


Merge remote-tracking branch 'upstream/master'

Updates to latest upstream master, including a bump in version from
0.7 to 0.8.

Conflicts:
       jvm-packages/pom.xml
       jvm-packages/xgboost4j-spark/pom.xml
       
jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
       jvm-packages/xgboost4j/pom.xml

Change-Id: I1ae675ee924579623f2cf5d5fc4b797c84e56d0c
---
M jvm-packages/pom.xml
M jvm-packages/xgboost4j-spark/pom.xml
M 
jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
M jvm-packages/xgboost4j/pom.xml
4 files changed, 15 insertions(+), 101 deletions(-)

Approvals:
  jenkins-bot: Verified
  DCausse: Looks good to me, approved



diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index 130505d..0fab33d 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -4,11 +4,7 @@
 
     <groupId>ml.dmlc</groupId>
     <artifactId>xgboost-jvm</artifactId>
-<<<<<<< HEAD   (9bdbdc Add unique tag to log instances in RabitTracker)
-    <version>0.7-wmf-2-SNAPSHOT</version>
-=======
-    <version>0.8-SNAPSHOT</version>
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and 
update v)
+    <version>0.8-wmf-1-SNAPSHOT</version>
     <packaging>pom</packaging>
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
diff --git a/jvm-packages/xgboost4j-spark/pom.xml 
b/jvm-packages/xgboost4j-spark/pom.xml
index 3532a91..5f02dd7 100644
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -4,11 +4,7 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm</artifactId>
-<<<<<<< HEAD   (9bdbdc Add unique tag to log instances in RabitTracker)
-        <version>0.7-wmf-2-SNAPSHOT</version>
-=======
-        <version>0.8-SNAPSHOT</version>
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and 
update v)
+        <version>0.8-wmf-1-SNAPSHOT</version>
     </parent>
     <artifactId>xgboost4j-spark</artifactId>
     <build>
diff --git 
a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
 
b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
index 053fbbb..2ff1ddf 100644
--- 
a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
+++ 
b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
@@ -16,17 +16,12 @@
 
 package ml.dmlc.xgboost4j.scala.spark
 
-<<<<<<< HEAD   (9bdbdc Add unique tag to log instances in RabitTracker)
 import java.io.ByteArrayInputStream
 import java.util.concurrent.TimeUnit
-
-import scala.collection.mutable
-import scala.concurrent.duration.Duration
-=======
 import java.io.File
 
 import scala.collection.mutable
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and 
update v)
+import scala.concurrent.duration.Duration
 import scala.util.Random
 import ml.dmlc.xgboost4j.java.{IRabitTracker, Rabit, XGBoostError, 
RabitTracker => PyRabitTracker}
 import ml.dmlc.xgboost4j.scala.rabit.RabitTracker
@@ -38,7 +33,6 @@
 import org.apache.spark.sql.Dataset
 import org.apache.spark.ml.feature.{LabeledPoint => MLLabeledPoint}
 import org.apache.spark.{SparkContext, SparkParallelismTracker, TaskContext}
-
 
 
 /**
@@ -121,23 +115,11 @@
       obj: ObjectiveTrait,
       eval: EvalTrait,
       useExternalMemory: Boolean,
-<<<<<<< HEAD   (9bdbdc Add unique tag to log instances in RabitTracker)
-      missing: Float): RDD[Array[Byte]] = {
-    val partitionedData = if (data.getNumPartitions != numWorkers) {
-      logger.info(s"repartitioning training set to $numWorkers partitions")
-      data.repartition(numWorkers)
-    } else {
-      data
-    }
-    val partitionedBaseMargin = partitionedData.map(_.baseMargin)
-    val appName = partitionedData.context.appName
-=======
       missing: Float,
       prevBooster: Booster
-    ): RDD[(Booster, Map[String, Array[Float]])] = {
+    ): RDD[(Int, Array[Byte], Map[String, Array[Float]])] = {
 
     val partitionedBaseMargin = data.map(_.baseMargin)
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and 
update v)
     // to workaround the empty partitions in training dataset,
     // this might not be the best efficient implementation, see
     // (https://github.com/dmlc/xgboost/issues/1277)
@@ -157,42 +139,28 @@
       } else {
         None
       }
-<<<<<<< HEAD   (9bdbdc Add unique tag to log instances in RabitTracker)
 
       // Yes it's odd to access this but not do anything. We are ensuring the 
lazily
       // initialized resource monitor is setup before we enter training.
       monitor
 
-      rabitEnv.put("DMLC_TASK_ID", TaskContext.getPartitionId().toString)
-=======
       rabitEnv.put("DMLC_TASK_ID", taskId)
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and 
update v)
       Rabit.init(rabitEnv)
       val watches = Watches(params,
-<<<<<<< HEAD   (9bdbdc Add unique tag to log instances in RabitTracker)
-        fromDenseToSparseLabeledPoints(labeledPoints, missing),
-        fromBaseMarginsToArray(baseMargins), cacheFileName)
-=======
         removeMissingValues(labeledPoints, missing),
         fromBaseMarginsToArray(baseMargins), cacheDirName)
 
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and 
update v)
       try {
         val numEarlyStoppingRounds = params.get("numEarlyStoppingRounds")
             .map(_.toString.toInt).getOrElse(0)
         val metrics = Array.tabulate(watches.size)(_ => 
Array.ofDim[Float](round))
         val booster = SXGBoost.train(watches.train, params, round,
-<<<<<<< HEAD   (9bdbdc Add unique tag to log instances in RabitTracker)
-          watches = watches.toMap, obj = obj, eval = eval,
-          earlyStoppingRound = numEarlyStoppingRounds)
-        val bytes = booster.toByteArray
-        booster.dispose
-        Iterator(bytes)
-=======
           watches.toMap, metrics, obj, eval,
           earlyStoppingRound = numEarlyStoppingRounds, prevBooster)
-        Iterator(booster -> watches.toMap.keys.zip(metrics).toMap)
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and 
update v)
+        val bytes = booster.toByteArray
+        val version = booster.booster.getVersion
+        booster.dispose
+        Iterator((version, bytes, watches.toMap.keys.zip(metrics).toMap))
       } finally {
         Rabit.shutdown()
         watches.delete()
@@ -379,20 +347,6 @@
     val (checkpointPath, savingFeq) = CheckpointManager.extractParams(params)
     val partitionedData = repartitionForTraining(trainingData, nWorkers)
 
-<<<<<<< HEAD   (9bdbdc Add unique tag to log instances in RabitTracker)
-    val tracker = startTracker(nWorkers, trackerConf)
-    try {
-      val sc = trainingData.sparkContext
-      val parallelismTracker = new SparkParallelismTracker(sc, 
timeoutRequestWorkers, nWorkers)
-      val overriddenParams = overrideParamsAccordingToTaskCPUs(params, 
trainingData.sparkContext)
-      val boosterBytes = buildDistributedBoosters(trainingData, 
overriddenParams,
-        tracker.getWorkerEnvs, nWorkers, round, obj, eval, useExternalMemory, 
missing)
-      val sparkJobThread = new Thread() {
-        override def run() {
-          // force the job
-          boosterBytes.foreachPartition(() => _)
-        }
-=======
     val sc = trainingData.sparkContext
     val checkpointManager = new CheckpointManager(sc, checkpointPath)
     checkpointManager.cleanUpHigherVersions(round)
@@ -431,24 +385,7 @@
           model
       } finally {
         tracker.stop()
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and 
update v)
       }
-<<<<<<< HEAD   (9bdbdc Add unique tag to log instances in RabitTracker)
-      sparkJobThread.setUncaughtExceptionHandler(tracker)
-      sparkJobThread.start()
-      val isClsTask = isClassificationTask(params)
-      val trackerReturnVal = parallelismTracker.execute(tracker.waitFor(0L))
-      logger.info(s"Rabit returns with exit code $trackerReturnVal")
-      val model = postTrackerReturnProcessing(trackerReturnVal, boosterBytes, 
overriddenParams,
-        sparkJobThread, isClsTask)
-      if (isClsTask){
-        model.asInstanceOf[XGBoostClassificationModel].numOfClasses =
-          params.getOrElse("num_class", "2").toString.toInt
-      }
-      model
-    } finally {
-      tracker.stop()
-=======
     }.last
   }
 
@@ -459,38 +396,27 @@
       trainingData.repartition(nWorkers)
     } else {
       trainingData
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and 
update v)
     }
   }
 
   private def postTrackerReturnProcessing(
-<<<<<<< HEAD   (9bdbdc Add unique tag to log instances in RabitTracker)
-      trackerReturnVal: Int, distributedBoosters: RDD[Array[Byte]],
-      params: Map[String, Any], sparkJobThread: Thread, isClassificationTask: 
Boolean):
-    XGBoostModel = {
-=======
       trackerReturnVal: Int,
-      distributedBoostersAndMetrics: RDD[(Booster, Map[String, Array[Float]])],
+      distributedBoostersAndMetrics: RDD[(Int, Array[Byte], Map[String, 
Array[Float]])],
       sparkJobThread: Thread,
       isClassificationTask: Boolean
   ): XGBoostModel = {
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and 
update v)
     if (trackerReturnVal == 0) {
-<<<<<<< HEAD   (9bdbdc Add unique tag to log instances in RabitTracker)
-      val bais = new ByteArrayInputStream(distributedBoosters.first())
-      val booster = SXGBoost.loadModel(bais)
-      val xgboostModel = XGBoostModel(booster, isClassificationTask)
-      distributedBoosters.unpersist(false)
-      xgboostModel
-=======
       // Copies of the final booster and the corresponding metrics
       // reside in each partition of the `distributedBoostersAndMetrics`.
       // Any of them can be used to create the model.
-      val (booster, metrics) = distributedBoostersAndMetrics.first()
+      val (version, boosterBytes, metrics) = 
distributedBoostersAndMetrics.first()
+
+      val bais = new ByteArrayInputStream(boosterBytes)
+      val booster = SXGBoost.loadModel(bais)
+      booster.booster.setVersion(version)
       val xgboostModel = XGBoostModel(booster, isClassificationTask)
       distributedBoostersAndMetrics.unpersist(false)
       xgboostModel.setSummary(XGBoostTrainingSummary(metrics))
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and 
update v)
     } else {
       try {
         if (sparkJobThread.isAlive) {
diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml
index f030fdb..24dbf7f 100644
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -4,11 +4,7 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm</artifactId>
-<<<<<<< HEAD   (9bdbdc Add unique tag to log instances in RabitTracker)
-        <version>0.7-wmf-2-SNAPSHOT</version>
-=======
-        <version>0.8-SNAPSHOT</version>
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and 
update v)
+        <version>0.8-wmf-1-SNAPSHOT</version>
     </parent>
     <artifactId>xgboost4j</artifactId>
     <packaging>jar</packaging>

-- 
To view, visit https://gerrit.wikimedia.org/r/402114
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I1ae675ee924579623f2cf5d5fc4b797c84e56d0c
Gerrit-PatchSet: 2
Gerrit-Project: search/xgboost
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to