[ 
https://issues.apache.org/jira/browse/MAHOUT-300?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12836826#action_12836826
 ] 

Jake Mannix commented on MAHOUT-300:
------------------------------------

and now that my run (of three comments ago) is finally done, with dot product 
removed since it's already been reported.

This properly demonstrates how slow it is to build up a SeqAcc vector 
incrementally, since it's not random-access, among other things.

{code}
INFO: 
BenchMarks              DenseVector             RandSparseVector        
SeqSparseVector                 
Clone                                                                           
                
                        nCalls = 25000;         nCalls = 25000;         nCalls 
= 25000;         
                        sum = 222.552872s;      sum = 34.923269s;       sum = 
34.251326s;       
                        min = 4.598ms;          min = 0.446ms;          min = 
0.4ms;            
                        max = 265.445ms;        max = 184.352ms;        max = 
182.734ms;        
                        mean = 8.902114ms;      mean = 1.39693ms;       mean = 
1.370053ms;      
                        stdDev = 11.676773ms;   stdDev = 4.533406ms;    stdDev 
= 5.002041ms;    
                        Speed = 112.33286 /sec  Speed = 715.8551 /sec   Speed = 
729.89874 /sec  
                        Rate = 0.6739971 MB/s   Rate = 4.2951303 MB/s   Rate = 
4.379392 MB/s    

Create (copy)                                                                   
                
                        nCalls = 25000;         nCalls = 25000;         nCalls 
= 25000;         
                        sum = 209.506424s;      sum = 1.371177s;        sum = 
0.667553s;        
                        min = 1.427ms;          min = 0.0050ms;         min = 
0.021ms;          
                        max = 11802.223ms;      max = 21.322ms;         max = 
10.036ms;         
                        mean = 8.380256ms;      mean = 0.054847ms;      mean = 
0.026702ms;      
                        stdDev = 27.862112ms;   stdDev = 0.324031ms;    stdDev 
= 0.130493ms;    
                        Speed = 119.32809 /sec  Speed = 18232.512 /sec  Speed = 
37450.207 /sec  
                        Rate = 0.7159685 MB/s   Rate = 109.395065 MB/s  Rate = 
224.70125 MB/s   

Create (incrementally)                                                          
                
                        nCalls = 25000;         nCalls = 25000;         nCalls 
= 25000;         
                        sum = 0.570172s;        sum = 0.755783s;        sum = 
3.969259s;        
                        min = 0.0ms;            min = 0.0ms;            min = 
0.093ms;          
                        max = 4.148ms;          max = 23.108ms;         max = 
13.452ms;         
                        mean = 0.022806ms;      mean = 0.030231ms;      mean = 
0.15877ms;       
                        stdDev = 0.060237ms;    stdDev = 0.196128ms;    stdDev 
= 0.192234ms;    
                        Speed = 43846.414 /sec  Speed = 33078.277 /sec  Speed = 
6298.405 /sec   
                        Rate = 263.0785 MB/s    Rate = 198.46967 MB/s   Rate = 
37.79043 MB/s    

org.apache.mahout.common.distance.CosineDistanceMeasure                         
                                               
                        nCalls = 25000;         nCalls = 25000;         nCalls 
= 25000;         
                        sum = 500.69893s;       sum = 29.026116s;       sum = 
3.367885s;        
                        min = 16.147ms;         min = 0.896ms;          min = 
0.086ms;          
                        max = 163.619ms;        max = 10.819ms;         max = 
11.731ms;         
                        mean = 20.027957ms;     mean = 1.161044ms;      mean = 
0.134715ms;      
                        stdDev = 4.146275ms;    stdDev = 0.345399ms;    stdDev 
= 0.092807ms;    
                        Speed = 49.930202 /sec  Speed = 861.29333 /sec  Speed = 
7423.056 /sec   
                        Rate = 0.2995812 MB/s   Rate = 5.16776 MB/s     Rate = 
44.538334 MB/s   

org.apache.mahout.common.distance.EuclideanDistanceMeasure                      
                                                  
                        nCalls = 25000;         nCalls = 25000;         nCalls 
= 25000;         
                        sum = 501.080023s;      sum = 26.812884s;       sum = 
3.649897s;        
                        min = 17.011ms;         min = 0.924ms;          min = 
0.086ms;          
                        max = 120.138ms;        max = 9.692ms;          max = 
13.113ms;         
                        mean = 20.0432ms;       mean = 1.072515ms;      mean = 
0.145995ms;      
                        stdDev = 4.410452ms;    stdDev = 0.262769ms;    stdDev 
= 0.192273ms;    
                        Speed = 49.89223 /sec   Speed = 932.3876 /sec   Speed = 
6849.5083 /sec  
                        Rate = 0.29935336 MB/s  Rate = 5.594325 MB/s    Rate = 
41.09705 MB/s    

org.apache.mahout.common.distance.ManhattanDistanceMeasure                      
                                                  
                        nCalls = 25000;         nCalls = 25000;         nCalls 
= 25000;         
                        sum = 2028.848082s;     sum = 267.83595s;       sum = 
225.853682s;      
                        min = 72.849ms;         min = 8.51ms;           min = 
6.922ms;          
                        max = 634.243ms;        max = 440.161ms;        max = 
55.592ms;         
                        mean = 81.153923ms;     mean = 10.713438ms;     mean = 
9.034147ms;      
                        stdDev = 10.933123ms;   stdDev = 3.517247ms;    stdDev 
= 2.359102ms;    
                        Speed = 12.322264 /sec  Speed = 93.34072 /sec   Speed = 
110.69113 /sec  
                        Rate = 0.07393358 MB/s  Rate = 0.5600443 MB/s   Rate = 
0.6641468 MB/s   

org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure               
                                                         
                        nCalls = 25000;         nCalls = 25000;         nCalls 
= 25000;         
                        sum = 501.964767s;      sum = 26.779795s;       sum = 
3.799706s;        
                        min = 16.936ms;         min = 0.92ms;           min = 
0.086ms;          
                        max = 445.578ms;        max = 16.946ms;         max = 
27.097ms;         
                        mean = 20.07859ms;      mean = 1.071191ms;      mean = 
0.151988ms;      
                        stdDev = 4.898891ms;    stdDev = 0.308015ms;    stdDev 
= 0.305723ms;    
                        Speed = 49.80429 /sec   Speed = 933.5396 /sec   Speed = 
6579.456 /sec   
                        Rate = 0.29882574 MB/s  Rate = 5.6012373 MB/s   Rate = 
39.476734 MB/s   

org.apache.mahout.common.distance.TanimotoDistanceMeasure                       
                                                 
                        nCalls = 25000;         nCalls = 25000;         nCalls 
= 25000;         
                        sum = 491.263306s;      sum = 26.216746s;       sum = 
3.356178s;        
                        min = 16.735ms;         min = 0.92ms;           min = 
0.086ms;          
                        max = 122.37ms;         max = 11.92ms;          max = 
5.274ms;          
                        mean = 19.650532ms;     mean = 1.048669ms;      mean = 
0.134247ms;      
                        stdDev = 3.963278ms;    stdDev = 0.166102ms;    stdDev 
= 0.040766ms;    
                        Speed = 50.88921 /sec   Speed = 953.5889 /sec   Speed = 
7448.9497 /sec  
                        Rate = 0.30533522 MB/s  Rate = 5.7215333 MB/s   Rate = 
44.693695 MB/s   
{code}

> Solve performance issues with Vector Implementations
> ----------------------------------------------------
>
>                 Key: MAHOUT-300
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-300
>             Project: Mahout
>          Issue Type: Improvement
>    Affects Versions: 0.3
>            Reporter: Robin Anil
>             Fix For: 0.3
>
>         Attachments: MAHOUT-300.patch, MAHOUT-300.patch, MAHOUT-300.patch, 
> MAHOUT-300.patch, MAHOUT-300.patch, MAHOUT-300.patch, MAHOUT-300.patch, 
> MAHOUT-300.patch, MAHOUT-300.patch
>
>
> AbstractVector operations like times
>   public Vector times(double x) {
>     Vector result = clone();
>     Iterator<Element> iter = iterateNonZero();
>     while (iter.hasNext()) {
>       Element element = iter.next();
>       int index = element.index();
>       result.setQuick(index, element.get() * x);
>     }
>     return result;
>   }
> should be implemented as follows
>  public Vector times(double x) {
>     Vector result = clone();
>     Iterator<Element> iter = result.iterateNonZero();
>     while (iter.hasNext()) {
>       Element element = iter.next();
>       element.set(element.get() * x);
>     }
>     return result;
>   }

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.

Reply via email to