zhengruifeng commented on pull request #29255:
URL: https://github.com/apache/spark/pull/29255#issuecomment-664216405
test code:
```
import org.apache.spark.ml.linalg._
import org.apache.spark.ml.classification._
import org.apache.spark.storage.StorageLevel
val df =
spark.read.format("libsvm").load("/data1/Datasets/a9a/a9a").withColumn("label",
(col("label")+1)/2)
df.persist(StorageLevel.MEMORY_AND_DISK)
df.count
val lr = new LogisticRegression().setMaxIter(10)
val model = lr.fit(df)
val vecs = df.select("features").rdd.map(row => row.getAs[Vector](0)).collect
model.setThreshold(0.2)
val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predict(vec)}}; val end = System.currentTimeMillis;
end - start
val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predictRaw(vec)}}; val end =
System.currentTimeMillis; end - start
val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predictProbability(vec)}}; val end =
System.currentTimeMillis; end - start
val start = System.currentTimeMillis; Seq.range(0, 100).foreach{i =>
model.transform(df).count}; val end = System.currentTimeMillis; end - start
model.setThresholds(Array(1, 10))
val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predict(vec)}}; val end = System.currentTimeMillis;
end - start
val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predictRaw(vec)}}; val end =
System.currentTimeMillis; end - start
val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predictProbability(vec)}}; val end =
System.currentTimeMillis; end - start
val start = System.currentTimeMillis; Seq.range(0, 100).foreach{i =>
model.transform(df).count}; val end = System.currentTimeMillis; end - start
```
Results:
this PR:
```
scala> model.setThreshold(0.2)
res12: model.type = LogisticRegressionModel: uid=logreg_4516abb8aba0,
numClasses=2, numFeatures=123
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predict(vec)}}; val end = System.currentTimeMillis;
end - start
start: Long = 1595839791616
end: Long = 1595839795511
res13: Long = 3895
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predictRaw(vec)}}; val end =
System.currentTimeMillis; end - start
start: Long = 1595839795647
end: Long = 1595839801387
res14: Long = 5740
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predictProbability(vec)}}; val end =
System.currentTimeMillis; end - start
start: Long = 1595839801574
end: Long = 1595839809076
res15: Long = 7502
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 100).foreach{i =>
model.transform(df).count}; val end = System.currentTimeMillis; end - start
start: Long = 1595839809178
end: Long = 1595839812969
res16: Long = 3791
scala>
scala>
scala>
scala> model.setThresholds(Array(1, 10))
res17: model.type = LogisticRegressionModel: uid=logreg_4516abb8aba0,
numClasses=2, numFeatures=123
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predict(vec)}}; val end = System.currentTimeMillis;
end - start
start: Long = 1595839813184
end: Long = 1595839816877
res18: Long = 3693
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predictRaw(vec)}}; val end =
System.currentTimeMillis; end - start
start: Long = 1595839816990
end: Long = 1595839822876
res19: Long = 5886
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predictProbability(vec)}}; val end =
System.currentTimeMillis; end - start
start: Long = 1595839822976
end: Long = 1595839830499
res20: Long = 7523
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 100).foreach{i =>
model.transform(df).count}; val end = System.currentTimeMillis; end - start
start: Long = 1595839893312
end: Long = 1595839896999
res21: Long = 3687
```
Master:
```
scala> model.setThreshold(0.2)
res28: model.type = LogisticRegressionModel: uid=logreg_ae02b202563b,
numClasses=2, numFeatures=123
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predict(vec)}}; val end = System.currentTimeMillis;
end - start
start: Long = 1595839547686
end: Long = 1595839575825
res29: Long = 28139
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predictRaw(vec)}}; val end =
System.currentTimeMillis; end - start
start: Long = 1595839575927
end: Long = 1595839581805
res30: Long = 5878
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predictProbability(vec)}}; val end =
System.currentTimeMillis; end - start
start: Long = 1595839581924
end: Long = 1595839591045
res31: Long = 9121
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 100).foreach{i =>
model.transform(df).count}; val end = System.currentTimeMillis; end - start
start: Long = 1595839591146
end: Long = 1595839595195
res32: Long = 4049
scala>
scala>
scala>
scala> model.setThresholds(Array(1, 10))
res33: model.type = LogisticRegressionModel: uid=logreg_ae02b202563b,
numClasses=2, numFeatures=123
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predict(vec)}}; val end = System.currentTimeMillis;
end - start
start: Long = 1595839595387
end: Long = 1595839616439
res34: Long = 21052
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predictRaw(vec)}}; val end =
System.currentTimeMillis; end - start
start: Long = 1595839616540
end: Long = 1595839622368
res35: Long = 5828
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i =>
vecs.foreach{vec => model.predictProbability(vec)}}; val end =
System.currentTimeMillis; end - start
start: Long = 1595839622455
end: Long = 1595839631541
res36: Long = 9086
scala>
scala> val start = System.currentTimeMillis; Seq.range(0, 100).foreach{i =>
model.transform(df).count}; val end = System.currentTimeMillis; end - start
start: Long = 1595839631632
end: Long = 1595839635489
res37: Long = 3857
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]