[
https://issues.apache.org/jira/browse/MAHOUT-1190?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13630312#comment-13630312
]
Robin Anil commented on MAHOUT-1190:
------------------------------------
{noformat}
BenchMarks DenseVector RandSparseVector
SeqSparseVector Clusters Dense.fn(Rand)
Dense.fn(Seq) Rand.fn(Dense) Rand.fn(Seq)
Seq.fn(Dense) Seq.fn(Rand) Closest center without EClosest
center with Elka
Clone
nCalls = 20000; nCalls = 20000; nCalls
= 20000;
sum = 0.094099s; sum = 0.318823s; sum =
0.072381s;
min = 0.0010ms; min = 0.0010ms; min =
0.0010ms;
max = 2.558ms; max = 4.139ms; max =
2.145ms;
mean = 0.004704ms; mean = 0.015941ms; mean =
0.003619ms;
stdDev = 0.058739ms; stdDev = 0.123114ms; stdDev
= 0.050722ms;
Speed = 212542.11 /sec Speed = 62730.73 /sec Speed =
276315.6 /sec
Rate = 2550.5054 MB/s Rate = 752.7688 MB/s Rate =
3315.7874 MB/s
Create (copy)
nCalls = 20000; nCalls = 20000; nCalls
= 20000;
sum = 0.345591s; sum = 1.548758s; sum =
0.91669s;
min = 0.011ms; min = 0.041ms; min =
0.03ms;
max = 6.345ms; max = 4.417ms; max =
4.338ms;
mean = 0.017279ms; mean = 0.077437ms; mean =
0.045834ms;
stdDev = 0.061681ms; stdDev = 0.211875ms; stdDev
= 0.054088ms;
Speed = 57871.875 /sec Speed = 12913.573 /sec Speed =
21817.627 /sec
Rate = 694.4625 MB/s Rate = 154.96289 MB/s Rate =
261.81152 MB/s
Create (incrementally)
nCalls = 20000; nCalls = 20000;
nCalls = 5000;
sum = 0.11603s; sum = 2.327242s;
sum = 0.441183s;
min = 0.0030ms; min = 0.07ms;
min = 0.067ms;
max = 0.28ms; max = 8.522ms;
max = 2.236ms;
mean = 0.005801ms; mean = 0.116362ms;
mean = 0.088236ms;
stdDev = 0.004521ms; stdDev = 0.268825ms;
stdDev = 0.144592ms;
Speed = 172369.22 /sec Speed = 8593.863 /sec
Speed = 45332.66 /sec
Rate = 2068.4307 MB/s Rate = 103.126366 MB/s
Rate = 543.99194 MB/s
Deserialize
nCalls = 20000; nCalls = 20000; nCalls
= 20000;
sum = 0.935276s; sum = 1.953629s; sum =
1.448476s;
min = 0.031ms; min = 0.069ms; min =
0.047ms;
max = 32.432ms; max = 21.848ms; max =
19.022ms;
mean = 0.046763ms; mean = 0.097681ms; mean =
0.072423ms;
stdDev = 0.23671ms; stdDev = 0.195235ms; stdDev
= 0.143115ms;
Speed = 21384.06 /sec Speed = 10237.357 /sec Speed =
13807.615 /sec
Rate = 256.60873 MB/s Rate = 122.8483 MB/s Rate =
165.69139 MB/s
DotProduct
nCalls = 20000; nCalls = 20000; nCalls
= 20000; nCalls = 20000; nCalls =
20000; nCalls = 20000; nCalls = 20000; nCalls = 20000;
nCalls = 20000;
sum = 0.025022s; sum = 0.822768s; sum =
1.034785s; sum = 0.782512s; sum =
1.221851s; sum = 0.889112s; sum = 1.676991s; sum =
0.642186s; sum = 0.778225s;
min = 0.0010ms; min = 0.025ms; min =
0.042ms; min = 0.034ms; min =
0.049ms; min = 0.029ms; min = 0.075ms; min =
0.028ms; min = 0.02ms;
max = 0.097ms; max = 5.611ms; max =
1.665ms; max = 1.868ms; max =
0.221ms; max = 1.831ms; max = 2.026ms; max =
0.385ms; max = 1.614ms;
mean = 0.001251ms; mean = 0.041138ms; mean =
0.051739ms; mean = 0.039125ms; mean =
0.061092ms; mean = 0.044455ms; mean = 0.083849ms; mean =
0.032109ms; mean = 0.038911ms;
stdDev = 0.003134ms; stdDev = 0.074289ms; stdDev
= 0.026677ms; stdDev = 0.023449ms; stdDev =
0.008498ms; stdDev = 0.029101ms; stdDev = 0.025225ms; stdDev =
0.009308ms; stdDev = 0.016324ms;
Speed = 799296.6 /sec Speed = 24308.19 /sec Speed =
19327.686 /sec Speed = 25558.713 /sec Speed =
16368.607 /sec Speed = 22494.354 /sec Speed = 11926.122 /sec Speed =
31143.625 /sec Speed = 25699.508 /sec
Rate = 9591.56 MB/s Rate = 291.69827 MB/s Rate =
231.93224 MB/s Rate = 306.70456 MB/s Rate =
196.4233 MB/s Rate = 269.93225 MB/s Rate = 143.11348 MB/s Rate =
373.7235 MB/s Rate = 308.3941 MB/s
Serialize
nCalls = 20000; nCalls = 20000; nCalls
= 20000;
sum = 1.044763s; sum = 1.841978s; sum =
1.302853s;
min = 0.03ms; min = 0.067ms; min =
0.04ms;
max = 4.773ms; max = 4.727ms; max =
4.417ms;
mean = 0.052238ms; mean = 0.092098ms; mean =
0.065142ms;
stdDev = 0.106575ms; stdDev = 0.130668ms; stdDev
= 0.120166ms;
Speed = 19143.098 /sec Speed = 10857.893 /sec Speed =
15350.926 /sec
Rate = 229.71718 MB/s Rate = 130.29472 MB/s Rate =
184.21112 MB/s
org.apache.mahout.common.distance.CosineDistanceMeasure
nCalls = 20000; nCalls = 20000; nCalls
= 20000; nCalls = 20000; nCalls =
20000; nCalls = 20000; nCalls = 20000; nCalls = 20000;
nCalls = 20000; nCalls = 200; nCalls = 200;
sum = 0.217529s; sum = 10.589026s; sum =
12.866331s; sum = 9.352726s; sum =
5.674042s; sum = 8.813188s; sum = 7.713065s; sum =
14.759785s; sum = 17.554847s; sum = 28.72354s; sum =
25.441976s;
min = 0.0090ms; min = 0.446ms; min =
0.54ms; min = 0.429ms; min =
0.266ms; min = 0.406ms; min = 0.351ms; min =
0.683ms; min = 0.802ms; min = 138.819ms; min =
123.072ms;
max = 0.296ms; max = 3.396ms; max =
3.285ms; max = 2.494ms; max =
1.711ms; max = 1.425ms; max = 2.414ms; max =
2.395ms; max = 3.06ms; max = 155.755ms; max =
138.626ms;
mean = 0.010876ms; mean = 0.529451ms; mean =
0.643316ms; mean = 0.467636ms; mean =
0.283702ms; mean = 0.440659ms; mean = 0.385653ms; mean =
0.737989ms; mean = 0.877742ms; mean = 143.6177ms; mean =
127.20988ms;
stdDev = 0.004232ms; stdDev = 0.12417ms; stdDev
= 0.086663ms; stdDev = 0.091864ms; stdDev =
0.02228ms; stdDev = 0.046419ms; stdDev = 0.049665ms; stdDev =
0.059414ms; stdDev = 0.110088ms; stdDev = 2.441513ms; stdDev =
2.224582ms;
Speed = 91941.766 /sec Speed = 1888.7478 /sec Speed =
1554.4447 /sec Speed = 2138.414 /sec Speed =
3524.8242 /sec Speed = 2269.3264 /sec Speed = 2593.003 /sec Speed =
1355.0332 /sec Speed = 1139.2864 /sec Speed = 696.29297 /sec Speed =
786.1024 /sec
Rate = 1103.3013 MB/s Rate = 22.664974 MB/s Rate =
18.653337 MB/s Rate = 25.66097 MB/s Rate =
42.297894 MB/s Rate = 27.231916 MB/s Rate = 31.116035 MB/s Rate =
16.260399 MB/s Rate = 13.671438 MB/s Rate = 8.355516 MB/s Rate =
9.433229 MB/s
org.apache.mahout.common.distance.EuclideanDistanceMeasure
nCalls = 20000; nCalls = 20000; nCalls
= 20000; nCalls = 20000; nCalls =
20000; nCalls = 20000; nCalls = 20000; nCalls = 20000;
nCalls = 20000; nCalls = 200; nCalls = 200;
sum = 0.25666s; sum = 11.483341s; sum =
13.724728s; sum = 9.518714s; sum =
5.947292s; sum = 9.080387s; sum = 7.758182s; sum =
14.655626s; sum = 17.584958s; sum = 28.465162s; sum =
26.106154s;
min = 0.0090ms; min = 0.523ms; min =
0.614ms; min = 0.43ms; min =
0.265ms; min = 0.406ms; min = 0.351ms; min =
0.682ms; min = 0.8ms; min = 138.436ms; min =
126.409ms;
max = 0.067ms; max = 2.595ms; max =
2.481ms; max = 2.496ms; max = 2.09ms;
max = 2.421ms; max = 2.267ms; max = 5.701ms;
max = 3.355ms; max = 149.691ms; max = 143.473ms;
mean = 0.012833ms; mean = 0.574167ms; mean =
0.686236ms; mean = 0.475935ms; mean =
0.297364ms; mean = 0.454019ms; mean = 0.387909ms; mean =
0.732781ms; mean = 0.879247ms; mean = 142.32581ms; mean =
130.53077ms;
stdDev = 0.007987ms; stdDev = 0.103067ms; stdDev
= 0.047585ms; stdDev = 0.101134ms; stdDev =
0.048258ms; stdDev = 0.061061ms; stdDev = 0.054873ms; stdDev =
0.057892ms; stdDev = 0.115034ms; stdDev = 2.017967ms; stdDev =
2.686664ms;
Speed = 77924.1 /sec Speed = 1741.6534 /sec Speed =
1457.2238 /sec Speed = 2101.1243 /sec Speed =
3362.875 /sec Speed = 2202.5493 /sec Speed = 2577.9236 /sec Speed =
1364.6636 /sec Speed = 1137.3357 /sec Speed = 702.6132 /sec Speed =
766.10284 /sec
Rate = 935.08923 MB/s Rate = 20.899841 MB/s Rate =
17.486685 MB/s Rate = 25.213491 MB/s Rate = 40.3545
MB/s Rate = 26.430593 MB/s Rate = 30.935083 MB/s Rate = 16.375963 MB/s
Rate = 13.648029 MB/s Rate = 8.431359 MB/s Rate = 9.193234 MB/s
org.apache.mahout.common.distance.ManhattanDistanceMeasure
nCalls = 20000; nCalls = 20000; nCalls
= 20000; nCalls = 20000; nCalls =
20000; nCalls = 20000; nCalls = 20000; nCalls = 20000;
nCalls = 20000; nCalls = 200; nCalls = 200;
sum = 3.170042s; sum = 32.556211s; sum =
25.579182s; sum = 10.780511s; sum =
9.422344s; sum = 32.343222s; sum = 30.738856s; sum =
12.301594s; sum = 26.174744s; sum = 85.775997s; sum =
78.017392s;
min = 0.123ms; min = 1.412ms; min =
1.14ms; min = 0.481ms; min =
0.413ms; min = 1.458ms; min = 1.397ms; min = 0.54ms;
min = 1.154ms; min = 417.87ms; min = 372.156ms;
max = 4.961ms; max = 7.106ms; max =
16.967ms; max = 2.4ms; max =
3.252ms; max = 4.803ms; max = 4.546ms; max =
4.868ms; max = 5.869ms; max = 468.881ms; max =
426.024ms;
mean = 0.158502ms; mean = 1.62781ms; mean =
1.278959ms; mean = 0.539025ms; mean =
0.471117ms; mean = 1.617161ms; mean = 1.536942ms; mean =
0.615079ms; mean = 1.308737ms; mean = 428.879985ms; mean =
390.08696ms;
stdDev = 0.139484ms; stdDev = 0.426293ms; stdDev
= 0.193845ms; stdDev = 0.078308ms; stdDev =
0.08385ms; stdDev = 0.229207ms; stdDev = 0.204743ms; stdDev =
0.106561ms; stdDev = 0.169764ms; stdDev = 8.345077ms; stdDev =
7.43826ms;
Speed = 6309.0645 /sec Speed = 614.3221 /sec Speed =
781.8858 /sec Speed = 1855.1996 /sec Speed =
2122.614 /sec Speed = 618.36755 /sec Speed = 650.6423 /sec Speed =
1625.8055 /sec Speed = 764.09534 /sec Speed = 233.16545 /sec Speed =
256.3531 /sec
Rate = 75.70878 MB/s Rate = 7.3718657 MB/s Rate =
9.38263 MB/s Rate = 22.262396 MB/s Rate =
25.471369 MB/s Rate = 7.420411 MB/s Rate = 7.807708 MB/s Rate =
19.509668 MB/s Rate = 9.169145 MB/s Rate = 2.7979856 MB/s Rate =
3.0762374 MB/s
org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure
nCalls = 20000; nCalls = 20000; nCalls
= 20000; nCalls = 20000; nCalls =
20000; nCalls = 20000; nCalls = 20000; nCalls = 20000;
nCalls = 20000; nCalls = 200; nCalls = 200;
sum = 0.290806s; sum = 11.536136s; sum =
13.880762s; sum = 9.766405s; sum =
5.758582s; sum = 9.01511s; sum = 7.766585s; sum =
15.236569s; sum = 17.606654s; sum = 28.457724s; sum =
26.15304s;
min = 0.0090ms; min = 0.523ms; min =
0.615ms; min = 0.431ms; min =
0.265ms; min = 0.406ms; min = 0.351ms; min =
0.684ms; min = 0.803ms; min = 138.713ms; min =
126.679ms;
max = 0.646ms; max = 2.827ms; max =
1.961ms; max = 2.81ms; max =
1.586ms; max = 2.527ms; max = 2.326ms; max =
5.876ms; max = 3.283ms; max = 154.886ms; max =
143.716ms;
mean = 0.01454ms; mean = 0.576806ms; mean =
0.694038ms; mean = 0.48832ms; mean =
0.287929ms; mean = 0.450755ms; mean = 0.388329ms; mean =
0.761828ms; mean = 0.880332ms; mean = 142.28862ms; mean =
130.7652ms;
stdDev = 0.029554ms; stdDev = 0.105924ms; stdDev
= 0.051278ms; stdDev = 0.10958ms; stdDev =
0.027647ms; stdDev = 0.058501ms; stdDev = 0.057172ms; stdDev =
0.119059ms; stdDev = 0.118333ms; stdDev = 2.144555ms; stdDev =
2.273529ms;
Speed = 68774.37 /sec Speed = 1733.6827 /sec Speed =
1440.843 /sec Speed = 2047.8364 /sec Speed =
3473.0774 /sec Speed = 2218.4976 /sec Speed = 2575.1343 /sec Speed =
1312.6315 /sec Speed = 1135.9342 /sec Speed = 702.7969 /sec Speed =
764.72943 /sec
Rate = 825.2924 MB/s Rate = 20.804193 MB/s Rate =
17.290117 MB/s Rate = 24.574038 MB/s Rate =
41.67693 MB/s Rate = 26.621973 MB/s Rate = 30.901615 MB/s Rate =
15.751578 MB/s Rate = 13.63121 MB/s Rate = 8.433563 MB/s Rate =
9.176754 MB/s
org.apache.mahout.common.distance.TanimotoDistanceMeasure
nCalls = 20000; nCalls = 20000; nCalls
= 20000; nCalls = 20000; nCalls =
20000; nCalls = 20000; nCalls = 20000; nCalls = 20000;
nCalls = 20000; nCalls = 200; nCalls = 200;
sum = 0.204348s; sum = 11.31s; sum =
13.732429s; sum = 9.189063s; sum =
5.670745s; sum = 8.865962s; sum = 7.617151s; sum =
14.710979s; sum = 17.103456s; sum = 29.166936s; sum =
26.670028s;
min = 0.0090ms; min = 0.496ms; min =
0.589ms; min = 0.43ms; min =
0.265ms; min = 0.406ms; min = 0.351ms; min =
0.682ms; min = 0.802ms; min = 142.985ms; min =
129.507ms;
max = 0.029ms; max = 2.249ms; max =
1.555ms; max = 2.174ms; max =
5.008ms; max = 2.216ms; max = 1.027ms; max =
1.983ms; max = 2.472ms; max = 155.875ms; max =
143.117ms;
mean = 0.010217ms; mean = 0.5655ms; mean =
0.686621ms; mean = 0.459453ms; mean =
0.283537ms; mean = 0.443298ms; mean = 0.380857ms; mean =
0.735548ms; mean = 0.855172ms; mean = 145.83468ms; mean =
133.35014ms;
stdDev = 8.95E-4ms; stdDev = 0.061798ms; stdDev
= 0.044508ms; stdDev = 0.044283ms; stdDev =
0.038711ms; stdDev = 0.050872ms; stdDev = 0.033901ms; stdDev =
0.05398ms; stdDev = 0.05586ms; stdDev = 1.889893ms; stdDev =
2.16708ms;
Speed = 97872.26 /sec Speed = 1768.3466 /sec Speed =
1456.4066 /sec Speed = 2176.5005 /sec Speed =
3526.8733 /sec Speed = 2255.8184 /sec Speed = 2625.6536 /sec Speed =
1359.5288 /sec Speed = 1169.3542 /sec Speed = 685.70795 /sec Speed =
749.90546 /sec
Rate = 1174.4672 MB/s Rate = 21.22016 MB/s Rate =
17.47688 MB/s Rate = 26.118008 MB/s Rate =
42.322483 MB/s Rate = 27.069822 MB/s Rate = 31.507845 MB/s Rate =
16.314346 MB/s Rate = 14.032252 MB/s Rate = 8.228496 MB/s Rate =
8.998866 MB/s
{noformat}
> SequentialAccessSparseVector function assignment is very slow
> -------------------------------------------------------------
>
> Key: MAHOUT-1190
> URL: https://issues.apache.org/jira/browse/MAHOUT-1190
> Project: Mahout
> Issue Type: Bug
> Reporter: Dan Filimon
>
> Currently when calling .assign() on a SASV with another vector and a custom
> function, it will iterate through it and assign every single entry while also
> referring it by index.
> This makes the process *hugely* expensive. (on a run of BallKMeans on the 20
> newsgroups data set, profiling reveals that 92% of the runtime was spent
> updating assigning the vectors).
> Here's a prototype patch:
> https://github.com/dfilimon/mahout/commit/63998d82bb750150a6ae09052dadf6c326c62d3d
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira