[ 
https://issues.apache.org/jira/browse/SPARK-32060?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

zhengruifeng updated SPARK-32060:
---------------------------------
    Description: 
|performace test in https://issues.apache.org/jira/browse/SPARK-31783,
 Huber loss seems start to diverge since 70 iters.

  {code:scala}
 for (size <- Seq(1, 4, 16, 64); iter <- Seq(10, 50, 100)) {
    Thread.sleep(10000)
    val hlir = new 
LinearRegression().setLoss("huber").setSolver("l-bfgs").setMaxIter(iter).setTol(0)
    val start = System.currentTimeMillis
    val model = hlir.setBlockSize(size).fit(df)
    val end = System.currentTimeMillis
    println((model.uid, size, iter, end - start, 
model.summary.objectiveHistory.last, model.summary.totalIterations, 
model.coefficients.toString.take(100)))
}{code}


  was:
|performace test in https://issues.apache.org/jira/browse/SPARK-31783,
 Huber loss seems start to diverge since 70 iters.
  {code:java}
 for (size <- Seq(1, 4, 16, 64); iter <- Seq(10, 50, 100)) {
    Thread.sleep(10000)
    val hlir = new 
LinearRegression().setLoss("huber").setSolver("l-bfgs").setMaxIter(iter).setTol(0)
    val start = System.currentTimeMillis
    val model = hlir.setBlockSize(size).fit(df)
    val end = System.currentTimeMillis
    println((model.uid, size, iter, end - start, 
model.summary.objectiveHistory.last, model.summary.totalIterations, 
model.coefficients.toString.take(100)))
}{code}|
| |
| |
| |
| |
| |
| |
| |
| |
|result:|
|blockSize=1|
|(linReg_887d29a0b42b,1,10,34222,12.600287516874573,11,[-1.128806276706593,8.677674008637235,9.388511222747894,8.55780534824698,34.241366265505654,26.96490)|
|(linReg_fa87d52d3e2f,1,50,134017,1.7265674039265724,51,[-1.2409375311919224,-0.36565818648554393,1.0271741000977583,-0.5264376930209739,-1.544463380879014,)|
|(linReg_b2a07f6fa653,1,100,259137,0.7519335552972538,101,[-0.3821288691282684,0.22040814987367136,0.07747613675383101,0.16130205219214436,1.2347926613828966,)|

blockSize=4|
|(linReg_779f6890aee9,4,10,7241,12.600287516879131,11,[-1.128806276706101,8.677674008649985,9.38851122275203,8.557805348259139,34.241366265511715,26.96490)|
|(linReg_0e6d961e054f,4,50,11691,1.726567383577527,51,[-1.2409376473684588,-0.3656580427637058,1.0271741488856692,-0.5264377459728347,-1.5444635623477996,)|
|(linReg_1e12fafab7d2,4,100,17966,0.796858465032771,101,[-0.014663920062692357,-0.057216366204118345,0.1764582527782608,0.12141286532514688,1.58266258533765)|

blockSize=16|
|(linReg_5ad195c843bb,16,10,7338,12.600287516896273,11,[-1.1288062767576779,8.677674008672964,9.388511222753797,8.557805348281347,34.24136626552257,26.9649)|
|(linReg_686fe7849c42,16,50,12093,1.7265673762478049,51,[-1.2409376965631724,-0.3656579898205299,1.0271741857198382,-0.5264377659307408,-1.5444636325154564,)|
|(linReg_cc934209aac1,16,100,18253,0.7844992170383625,101,[-0.4230952901291041,0.08770018558785676,0.2719402480140563,0.08602481376955884,0.8763149744964053,-)|

blockSize=64|
|(linReg_2de48672cf40,64,10,7956,12.600287516883563,11,[-1.1288062767198885,8.677674008655007,9.388511222751507,8.557805348264019,34.24136626551386,26.9649)|
|(linReg_a4ed072bdf00,64,50,14423,1.7265674032944005,51,[-1.240937585330031,-0.36565823041213286,1.02717419529322,-0.5264376482700692,-1.5444634018412484,0.)|
|(linReg_ed9bf8e6db3d,64,100,22680,0.7508904951409897,101,[-0.39923222418441695,0.2591603128603928,0.025707538173424214,0.06178131424518882,1.3651702157456522)|


> Huber loss Convergence
> ----------------------
>
>                 Key: SPARK-32060
>                 URL: https://issues.apache.org/jira/browse/SPARK-32060
>             Project: Spark
>          Issue Type: Sub-task
>          Components: ML
>    Affects Versions: 3.1.0
>            Reporter: zhengruifeng
>            Priority: Minor
>         Attachments: huber.xlsx, image-2020-06-28-18-05-28-867.png
>
>
> |performace test in https://issues.apache.org/jira/browse/SPARK-31783,
>  Huber loss seems start to diverge since 70 iters.
>   {code:scala}
>  for (size <- Seq(1, 4, 16, 64); iter <- Seq(10, 50, 100)) {
>     Thread.sleep(10000)
>     val hlir = new 
> LinearRegression().setLoss("huber").setSolver("l-bfgs").setMaxIter(iter).setTol(0)
>     val start = System.currentTimeMillis
>     val model = hlir.setBlockSize(size).fit(df)
>     val end = System.currentTimeMillis
>     println((model.uid, size, iter, end - start, 
> model.summary.objectiveHistory.last, model.summary.totalIterations, 
> model.coefficients.toString.take(100)))
> }{code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to