zhengruifeng commented on pull request #29255:
URL: https://github.com/apache/spark/pull/29255#issuecomment-664216405


   test code:
   ```
   import org.apache.spark.ml.linalg._
   import org.apache.spark.ml.classification._
   import org.apache.spark.storage.StorageLevel
   
   
   val df = 
spark.read.format("libsvm").load("/data1/Datasets/a9a/a9a").withColumn("label", 
(col("label")+1)/2)
   df.persist(StorageLevel.MEMORY_AND_DISK)
   df.count
   
   val lr = new LogisticRegression().setMaxIter(10)
   val model = lr.fit(df)
   
   
   val vecs = df.select("features").rdd.map(row => row.getAs[Vector](0)).collect
   
   
   model.setThreshold(0.2)
   
   val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predict(vec)}}; val end = System.currentTimeMillis; 
end - start
   
   val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predictRaw(vec)}}; val end = 
System.currentTimeMillis; end - start
   
   val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predictProbability(vec)}}; val end = 
System.currentTimeMillis; end - start
   
   val start = System.currentTimeMillis; Seq.range(0, 100).foreach{i => 
model.transform(df).count}; val end = System.currentTimeMillis; end - start
   
   
   
   model.setThresholds(Array(1, 10))
   
   val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predict(vec)}}; val end = System.currentTimeMillis; 
end - start
   
   val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predictRaw(vec)}}; val end = 
System.currentTimeMillis; end - start
   
   val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predictProbability(vec)}}; val end = 
System.currentTimeMillis; end - start
   
   val start = System.currentTimeMillis; Seq.range(0, 100).foreach{i => 
model.transform(df).count}; val end = System.currentTimeMillis; end - start
   ```
   
   
   Results:
   this PR:
   ```
   scala> model.setThreshold(0.2)
   res12: model.type = LogisticRegressionModel: uid=logreg_4516abb8aba0, 
numClasses=2, numFeatures=123
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predict(vec)}}; val end = System.currentTimeMillis; 
end - start
   start: Long = 1595839791616
   end: Long = 1595839795511
   res13: Long = 3895
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predictRaw(vec)}}; val end = 
System.currentTimeMillis; end - start
   start: Long = 1595839795647
   end: Long = 1595839801387
   res14: Long = 5740
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predictProbability(vec)}}; val end = 
System.currentTimeMillis; end - start
   start: Long = 1595839801574
   end: Long = 1595839809076
   res15: Long = 7502
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 100).foreach{i => 
model.transform(df).count}; val end = System.currentTimeMillis; end - start
   start: Long = 1595839809178
   end: Long = 1595839812969
   res16: Long = 3791
   
   scala> 
   
   scala> 
   
   scala> 
   
   scala> model.setThresholds(Array(1, 10))
   res17: model.type = LogisticRegressionModel: uid=logreg_4516abb8aba0, 
numClasses=2, numFeatures=123
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predict(vec)}}; val end = System.currentTimeMillis; 
end - start
   start: Long = 1595839813184
   end: Long = 1595839816877
   res18: Long = 3693
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predictRaw(vec)}}; val end = 
System.currentTimeMillis; end - start
   start: Long = 1595839816990
   end: Long = 1595839822876
   res19: Long = 5886
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predictProbability(vec)}}; val end = 
System.currentTimeMillis; end - start
   start: Long = 1595839822976
   end: Long = 1595839830499
   res20: Long = 7523
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 100).foreach{i => 
model.transform(df).count}; val end = System.currentTimeMillis; end - start
   start: Long = 1595839893312
   end: Long = 1595839896999
   res21: Long = 3687
   ```
   
   Master:
   ```
   scala> model.setThreshold(0.2)
   res28: model.type = LogisticRegressionModel: uid=logreg_ae02b202563b, 
numClasses=2, numFeatures=123
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predict(vec)}}; val end = System.currentTimeMillis; 
end - start
   start: Long = 1595839547686
   end: Long = 1595839575825
   res29: Long = 28139
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predictRaw(vec)}}; val end = 
System.currentTimeMillis; end - start
   start: Long = 1595839575927
   end: Long = 1595839581805
   res30: Long = 5878
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predictProbability(vec)}}; val end = 
System.currentTimeMillis; end - start
   start: Long = 1595839581924
   end: Long = 1595839591045
   res31: Long = 9121
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 100).foreach{i => 
model.transform(df).count}; val end = System.currentTimeMillis; end - start
   start: Long = 1595839591146
   end: Long = 1595839595195
   res32: Long = 4049
   
   scala> 
   
   scala> 
   
   scala> 
   
   scala> model.setThresholds(Array(1, 10))
   res33: model.type = LogisticRegressionModel: uid=logreg_ae02b202563b, 
numClasses=2, numFeatures=123
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predict(vec)}}; val end = System.currentTimeMillis; 
end - start
   start: Long = 1595839595387
   end: Long = 1595839616439
   res34: Long = 21052
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predictRaw(vec)}}; val end = 
System.currentTimeMillis; end - start
   start: Long = 1595839616540
   end: Long = 1595839622368
   res35: Long = 5828
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 1000).foreach{i => 
vecs.foreach{vec => model.predictProbability(vec)}}; val end = 
System.currentTimeMillis; end - start
   start: Long = 1595839622455
   end: Long = 1595839631541
   res36: Long = 9086
   
   scala> 
   
   scala> val start = System.currentTimeMillis; Seq.range(0, 100).foreach{i => 
model.transform(df).count}; val end = System.currentTimeMillis; end - start
   start: Long = 1595839631632
   end: Long = 1595839635489
   res37: Long = 3857
   ```
   
   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to