jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/402114 )
Change subject: Merge remote-tracking branch 'upstream/master'
......................................................................
Merge remote-tracking branch 'upstream/master'
Updates to latest upstream master, including a bump in version from
0.7 to 0.8.
Conflicts:
jvm-packages/pom.xml
jvm-packages/xgboost4j-spark/pom.xml
jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
jvm-packages/xgboost4j/pom.xml
Change-Id: I1ae675ee924579623f2cf5d5fc4b797c84e56d0c
---
M jvm-packages/pom.xml
M jvm-packages/xgboost4j-spark/pom.xml
M
jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
M jvm-packages/xgboost4j/pom.xml
4 files changed, 15 insertions(+), 101 deletions(-)
Approvals:
jenkins-bot: Verified
DCausse: Looks good to me, approved
diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index 130505d..0fab33d 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -4,11 +4,7 @@
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
-<<<<<<< HEAD (9bdbdc Add unique tag to log instances in RabitTracker)
- <version>0.7-wmf-2-SNAPSHOT</version>
-=======
- <version>0.8-SNAPSHOT</version>
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and
update v)
+ <version>0.8-wmf-1-SNAPSHOT</version>
<packaging>pom</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
diff --git a/jvm-packages/xgboost4j-spark/pom.xml
b/jvm-packages/xgboost4j-spark/pom.xml
index 3532a91..5f02dd7 100644
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -4,11 +4,7 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
-<<<<<<< HEAD (9bdbdc Add unique tag to log instances in RabitTracker)
- <version>0.7-wmf-2-SNAPSHOT</version>
-=======
- <version>0.8-SNAPSHOT</version>
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and
update v)
+ <version>0.8-wmf-1-SNAPSHOT</version>
</parent>
<artifactId>xgboost4j-spark</artifactId>
<build>
diff --git
a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
index 053fbbb..2ff1ddf 100644
---
a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
+++
b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
@@ -16,17 +16,12 @@
package ml.dmlc.xgboost4j.scala.spark
-<<<<<<< HEAD (9bdbdc Add unique tag to log instances in RabitTracker)
import java.io.ByteArrayInputStream
import java.util.concurrent.TimeUnit
-
-import scala.collection.mutable
-import scala.concurrent.duration.Duration
-=======
import java.io.File
import scala.collection.mutable
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and
update v)
+import scala.concurrent.duration.Duration
import scala.util.Random
import ml.dmlc.xgboost4j.java.{IRabitTracker, Rabit, XGBoostError,
RabitTracker => PyRabitTracker}
import ml.dmlc.xgboost4j.scala.rabit.RabitTracker
@@ -38,7 +33,6 @@
import org.apache.spark.sql.Dataset
import org.apache.spark.ml.feature.{LabeledPoint => MLLabeledPoint}
import org.apache.spark.{SparkContext, SparkParallelismTracker, TaskContext}
-
/**
@@ -121,23 +115,11 @@
obj: ObjectiveTrait,
eval: EvalTrait,
useExternalMemory: Boolean,
-<<<<<<< HEAD (9bdbdc Add unique tag to log instances in RabitTracker)
- missing: Float): RDD[Array[Byte]] = {
- val partitionedData = if (data.getNumPartitions != numWorkers) {
- logger.info(s"repartitioning training set to $numWorkers partitions")
- data.repartition(numWorkers)
- } else {
- data
- }
- val partitionedBaseMargin = partitionedData.map(_.baseMargin)
- val appName = partitionedData.context.appName
-=======
missing: Float,
prevBooster: Booster
- ): RDD[(Booster, Map[String, Array[Float]])] = {
+ ): RDD[(Int, Array[Byte], Map[String, Array[Float]])] = {
val partitionedBaseMargin = data.map(_.baseMargin)
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and
update v)
// to workaround the empty partitions in training dataset,
// this might not be the best efficient implementation, see
// (https://github.com/dmlc/xgboost/issues/1277)
@@ -157,42 +139,28 @@
} else {
None
}
-<<<<<<< HEAD (9bdbdc Add unique tag to log instances in RabitTracker)
// Yes it's odd to access this but not do anything. We are ensuring the
lazily
// initialized resource monitor is setup before we enter training.
monitor
- rabitEnv.put("DMLC_TASK_ID", TaskContext.getPartitionId().toString)
-=======
rabitEnv.put("DMLC_TASK_ID", taskId)
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and
update v)
Rabit.init(rabitEnv)
val watches = Watches(params,
-<<<<<<< HEAD (9bdbdc Add unique tag to log instances in RabitTracker)
- fromDenseToSparseLabeledPoints(labeledPoints, missing),
- fromBaseMarginsToArray(baseMargins), cacheFileName)
-=======
removeMissingValues(labeledPoints, missing),
fromBaseMarginsToArray(baseMargins), cacheDirName)
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and
update v)
try {
val numEarlyStoppingRounds = params.get("numEarlyStoppingRounds")
.map(_.toString.toInt).getOrElse(0)
val metrics = Array.tabulate(watches.size)(_ =>
Array.ofDim[Float](round))
val booster = SXGBoost.train(watches.train, params, round,
-<<<<<<< HEAD (9bdbdc Add unique tag to log instances in RabitTracker)
- watches = watches.toMap, obj = obj, eval = eval,
- earlyStoppingRound = numEarlyStoppingRounds)
- val bytes = booster.toByteArray
- booster.dispose
- Iterator(bytes)
-=======
watches.toMap, metrics, obj, eval,
earlyStoppingRound = numEarlyStoppingRounds, prevBooster)
- Iterator(booster -> watches.toMap.keys.zip(metrics).toMap)
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and
update v)
+ val bytes = booster.toByteArray
+ val version = booster.booster.getVersion
+ booster.dispose
+ Iterator((version, bytes, watches.toMap.keys.zip(metrics).toMap))
} finally {
Rabit.shutdown()
watches.delete()
@@ -379,20 +347,6 @@
val (checkpointPath, savingFeq) = CheckpointManager.extractParams(params)
val partitionedData = repartitionForTraining(trainingData, nWorkers)
-<<<<<<< HEAD (9bdbdc Add unique tag to log instances in RabitTracker)
- val tracker = startTracker(nWorkers, trackerConf)
- try {
- val sc = trainingData.sparkContext
- val parallelismTracker = new SparkParallelismTracker(sc,
timeoutRequestWorkers, nWorkers)
- val overriddenParams = overrideParamsAccordingToTaskCPUs(params,
trainingData.sparkContext)
- val boosterBytes = buildDistributedBoosters(trainingData,
overriddenParams,
- tracker.getWorkerEnvs, nWorkers, round, obj, eval, useExternalMemory,
missing)
- val sparkJobThread = new Thread() {
- override def run() {
- // force the job
- boosterBytes.foreachPartition(() => _)
- }
-=======
val sc = trainingData.sparkContext
val checkpointManager = new CheckpointManager(sc, checkpointPath)
checkpointManager.cleanUpHigherVersions(round)
@@ -431,24 +385,7 @@
model
} finally {
tracker.stop()
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and
update v)
}
-<<<<<<< HEAD (9bdbdc Add unique tag to log instances in RabitTracker)
- sparkJobThread.setUncaughtExceptionHandler(tracker)
- sparkJobThread.start()
- val isClsTask = isClassificationTask(params)
- val trackerReturnVal = parallelismTracker.execute(tracker.waitFor(0L))
- logger.info(s"Rabit returns with exit code $trackerReturnVal")
- val model = postTrackerReturnProcessing(trackerReturnVal, boosterBytes,
overriddenParams,
- sparkJobThread, isClsTask)
- if (isClsTask){
- model.asInstanceOf[XGBoostClassificationModel].numOfClasses =
- params.getOrElse("num_class", "2").toString.toInt
- }
- model
- } finally {
- tracker.stop()
-=======
}.last
}
@@ -459,38 +396,27 @@
trainingData.repartition(nWorkers)
} else {
trainingData
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and
update v)
}
}
private def postTrackerReturnProcessing(
-<<<<<<< HEAD (9bdbdc Add unique tag to log instances in RabitTracker)
- trackerReturnVal: Int, distributedBoosters: RDD[Array[Byte]],
- params: Map[String, Any], sparkJobThread: Thread, isClassificationTask:
Boolean):
- XGBoostModel = {
-=======
trackerReturnVal: Int,
- distributedBoostersAndMetrics: RDD[(Booster, Map[String, Array[Float]])],
+ distributedBoostersAndMetrics: RDD[(Int, Array[Byte], Map[String,
Array[Float]])],
sparkJobThread: Thread,
isClassificationTask: Boolean
): XGBoostModel = {
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and
update v)
if (trackerReturnVal == 0) {
-<<<<<<< HEAD (9bdbdc Add unique tag to log instances in RabitTracker)
- val bais = new ByteArrayInputStream(distributedBoosters.first())
- val booster = SXGBoost.loadModel(bais)
- val xgboostModel = XGBoostModel(booster, isClassificationTask)
- distributedBoosters.unpersist(false)
- xgboostModel
-=======
// Copies of the final booster and the corresponding metrics
// reside in each partition of the `distributedBoostersAndMetrics`.
// Any of them can be used to create the model.
- val (booster, metrics) = distributedBoostersAndMetrics.first()
+ val (version, boosterBytes, metrics) =
distributedBoostersAndMetrics.first()
+
+ val bais = new ByteArrayInputStream(boosterBytes)
+ val booster = SXGBoost.loadModel(bais)
+ booster.booster.setVersion(version)
val xgboostModel = XGBoostModel(booster, isClassificationTask)
distributedBoostersAndMetrics.unpersist(false)
xgboostModel.setSummary(XGBoostTrainingSummary(metrics))
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and
update v)
} else {
try {
if (sparkJobThread.isAlive) {
diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml
index f030fdb..24dbf7f 100644
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -4,11 +4,7 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
-<<<<<<< HEAD (9bdbdc Add unique tag to log instances in RabitTracker)
- <version>0.7-wmf-2-SNAPSHOT</version>
-=======
- <version>0.8-SNAPSHOT</version>
->>>>>>> BRANCH (14c639 [jvm-packages] add dev script to update version and
update v)
+ <version>0.8-wmf-1-SNAPSHOT</version>
</parent>
<artifactId>xgboost4j</artifactId>
<packaging>jar</packaging>
--
To view, visit https://gerrit.wikimedia.org/r/402114
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I1ae675ee924579623f2cf5d5fc4b797c84e56d0c
Gerrit-PatchSet: 2
Gerrit-Project: search/xgboost
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits