Repository: incubator-systemml
Updated Branches:
  refs/heads/master 2b7fdb2b3 -> da3187392


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java 
b/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
index c14e3bf..70e776a 100644
--- 
a/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
+++ 
b/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
@@ -1,383 +1,383 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress.utils;
-
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-
-/**
- * Various low-level primitives for compressed matrix blocks, some of which
- * were copied from LibMatrixMult.
- * 
- */
-public class LinearAlgebraUtils {
-
-       /**
-        * 
-        * @param a
-        * @param b
-        * @param len
-        * @return
-        */
-       public static double dotProduct(double[] a, double[] b, final int len) 
-       {
-               double val = 0;
-               final int bn = len % 8;
-
-               // compute rest
-               for (int i = 0; i < bn; i++)
-                       val += a[i] * b[i];
-
-               // unrolled 8-block (for better instruction-level parallelism)
-               for (int i = bn; i < len; i += 8) {
-                       // read 64B cachelines of a and b
-                       // compute cval' = sum(a * b) + cval
-                       val += a[i + 0] * b[i + 0] 
-                                + a[i + 1] * b[i + 1] 
-                                + a[i + 2] * b[i + 2] 
-                                + a[i + 3] * b[i + 3] 
-                                + a[i + 4] * b[i + 4]
-                                + a[i + 5] * b[i + 5] 
-                                + a[i + 6] * b[i + 6] 
-                                + a[i + 7] * b[i + 7];
-               }
-
-               // scalar result
-               return val;
-       }
-       
-       /**
-        * 
-        * @param a
-        * @param b
-        * @param ai
-        * @param bi
-        * @param len
-        * @return
-        */
-       public static double dotProduct( double[] a, double[] b, int ai, int 
bi, final int len )
-       {
-               double val = 0;
-               final int bn = len%8;
-                               
-               //compute rest
-               for( int i = 0; i < bn; i++, ai++, bi++ )
-                       val += a[ ai ] * b[ bi ];
-               
-               //unrolled 8-block (for better instruction-level parallelism)
-               for( int i = bn; i < len; i+=8, ai+=8, bi+=8 )
-               {
-                       //read 64B cachelines of a and b
-                       //compute cval' = sum(a * b) + cval
-                       val += a[ ai+0 ] * b[ bi+0 ]
-                            + a[ ai+1 ] * b[ bi+1 ]
-                            + a[ ai+2 ] * b[ bi+2 ]
-                            + a[ ai+3 ] * b[ bi+3 ]
-                            + a[ ai+4 ] * b[ bi+4 ]
-                            + a[ ai+5 ] * b[ bi+5 ]
-                            + a[ ai+6 ] * b[ bi+6 ]
-                            + a[ ai+7 ] * b[ bi+7 ];
-               }
-               
-               //scalar result
-               return val; 
-       }
-       
-       /**
-        * 
-        * @param a
-        * @param c
-        * @param ai
-        * @param ci
-        * @param len
-        */
-       public static void vectAdd( double[] a, double[] c, int ai, int ci, 
final int len )
-       {
-               final int bn = len%8;
-               
-               //rest, not aligned to 8-blocks
-               for( int j = 0; j < bn; j++, ai++, ci++)
-                       c[ ci ] += a[ ai ];
-               
-               //unrolled 8-block  (for better instruction-level parallelism)
-               for( int j = bn; j < len; j+=8, ai+=8, ci+=8) 
-               {
-                       //read 64B cachelines of a and c
-                       //compute c' = c * a
-                       //write back 64B cacheline of c = c'
-                       c[ ci+0 ] += a[ ai+0 ];
-                       c[ ci+1 ] += a[ ai+1 ];
-                       c[ ci+2 ] += a[ ai+2 ];
-                       c[ ci+3 ] += a[ ai+3 ];
-                       c[ ci+4 ] += a[ ai+4 ];
-                       c[ ci+5 ] += a[ ai+5 ];
-                       c[ ci+6 ] += a[ ai+6 ];
-                       c[ ci+7 ] += a[ ai+7 ];
-               }
-       }
-       
-       /**
-        * 
-        * @param aval
-        * @param b
-        * @param c
-        * @param bix
-        * @param ci
-        * @param len
-        */
-       public static void vectAdd( final double aval, double[] c, char[] bix, 
final int bi, final int ci, final int len )
-       {
-               final int bn = len%8;
-               
-               //rest, not aligned to 8-blocks
-               for( int j = bi; j < bi+bn; j++ )
-                       c[ ci + bix[j] ] += aval;
-               
-               //unrolled 8-block (for better instruction-level parallelism)
-               for( int j = bi+bn; j < bi+len; j+=8 )
-               {
-                       c[ ci+bix[j+0] ] += aval;
-                       c[ ci+bix[j+1] ] += aval;
-                       c[ ci+bix[j+2] ] += aval;
-                       c[ ci+bix[j+3] ] += aval;
-                       c[ ci+bix[j+4] ] += aval;
-                       c[ ci+bix[j+5] ] += aval;
-                       c[ ci+bix[j+6] ] += aval;
-                       c[ ci+bix[j+7] ] += aval;
-               }
-       }
-       
-       /**
-        * 
-        * @param aval
-        * @param c
-        * @param ci
-        * @param len
-        */
-       public static void vectAdd( final double aval, double[] c, final int 
ci, final int len )
-       {
-               final int bn = len%8;
-               
-               //rest, not aligned to 8-blocks
-               for( int j = 0; j < bn; j++ )
-                       c[ ci + j ] += aval;
-               
-               //unrolled 8-block (for better instruction-level parallelism)
-               for( int j = bn; j < len; j+=8 )
-               {
-                       c[ ci+j+0 ] += aval;
-                       c[ ci+j+1 ] += aval;
-                       c[ ci+j+2 ] += aval;
-                       c[ ci+j+3 ] += aval;
-                       c[ ci+j+4 ] += aval;
-                       c[ ci+j+5 ] += aval;
-                       c[ ci+j+6 ] += aval;
-                       c[ ci+j+7 ] += aval;
-               }
-       }
-       
-       /**
-        * 
-        * @param aval
-        * @param b
-        * @param c
-        * @param bix
-        * @param bi
-        * @param ci
-        * @param len
-        */
-       public static void vectMultiplyAdd( final double aval, double[] b, 
double[] c, int[] bix, final int bi, final int ci, final int len )
-       {
-               final int bn = (len-bi)%8;
-               
-               //rest, not aligned to 8-blocks
-               for( int j = bi; j < bi+bn; j++ )
-                       c[ ci + bix[j] ] += aval * b[ j ];
-               
-               //unrolled 8-block (for better instruction-level parallelism)
-               for( int j = bi+bn; j < len; j+=8 )
-               {
-                       c[ ci+bix[j+0] ] += aval * b[ j+0 ];
-                       c[ ci+bix[j+1] ] += aval * b[ j+1 ];
-                       c[ ci+bix[j+2] ] += aval * b[ j+2 ];
-                       c[ ci+bix[j+3] ] += aval * b[ j+3 ];
-                       c[ ci+bix[j+4] ] += aval * b[ j+4 ];
-                       c[ ci+bix[j+5] ] += aval * b[ j+5 ];
-                       c[ ci+bix[j+6] ] += aval * b[ j+6 ];
-                       c[ ci+bix[j+7] ] += aval * b[ j+7 ];
-               }
-       }
-       
-       /**
-        * 
-        * @param aval
-        * @param b
-        * @param c
-        * @param bi
-        * @param ci
-        * @param len
-        */
-       public static void vectMultiplyAdd( final double aval, double[] b, 
double[] c, int bi, int ci, final int len )
-       {
-               final int bn = len%8;
-               
-               //rest, not aligned to 8-blocks
-               for( int j = 0; j < bn; j++, bi++, ci++)
-                       c[ ci ] += aval * b[ bi ];
-               
-               //unrolled 8-block  (for better instruction-level parallelism)
-               for( int j = bn; j < len; j+=8, bi+=8, ci+=8) 
-               {
-                       //read 64B cachelines of b and c
-                       //compute c' = aval * b + c
-                       //write back 64B cacheline of c = c'
-                       c[ ci+0 ] += aval * b[ bi+0 ];
-                       c[ ci+1 ] += aval * b[ bi+1 ];
-                       c[ ci+2 ] += aval * b[ bi+2 ];
-                       c[ ci+3 ] += aval * b[ bi+3 ];
-                       c[ ci+4 ] += aval * b[ bi+4 ];
-                       c[ ci+5 ] += aval * b[ bi+5 ];
-                       c[ ci+6 ] += aval * b[ bi+6 ];
-                       c[ ci+7 ] += aval * b[ bi+7 ];
-               }
-       }
-
-       /**
-        * 
-        * @param a
-        * @param aix
-        * @param ai
-        * @param ai2
-        * @param len
-        * @return
-        */
-       public static double vectSum( double[] a, char[] bix, final int ai, 
final int bi, final int len )
-       {
-               double val = 0;
-               final int bn = len%8;
-               
-               //rest, not aligned to 8-blocks
-               for( int j = bi; j < bi+bn; j++ )
-                       val += a[ ai + bix[j] ];
-               
-               //unrolled 8-block (for better instruction-level parallelism)
-               for( int j = bi+bn; j < bi+len; j+=8 )
-               {
-                       val += a[ ai+bix[j+0] ]
-                            + a[ ai+bix[j+1] ]
-                            + a[ ai+bix[j+2] ]
-                            + a[ ai+bix[j+3] ]
-                            + a[ ai+bix[j+4] ]
-                            + a[ ai+bix[j+5] ]
-                            + a[ ai+bix[j+6] ]
-                            + a[ ai+bix[j+7] ];
-               }
-               
-               return val;
-       }
-       
-       /**
-        * 
-        * @param a
-        * @param ai
-        * @param len
-        * @return
-        */
-       public static double vectSum( double[] a, int ai, final int len )
-       {
-               double val = 0;
-               final int bn = len%8;
-               
-               //rest, not aligned to 8-blocks
-               for( int j = 0; j < bn; j++, ai++ )
-                       val += a[ ai ];
-               
-               //unrolled 8-block (for better instruction-level parallelism)
-               for( int j = bn; j < len; j+=8, ai+=8 )
-               {
-                       val += a[ ai+0 ]
-                            + a[ ai+1 ]
-                            + a[ ai+2 ]
-                            + a[ ai+3 ]
-                            + a[ ai+4 ]
-                            + a[ ai+5 ]
-                            + a[ ai+6 ]
-                            + a[ ai+7 ];
-               }
-               
-               return val;
-       }
-       
-       /**
-        * 
-        * @param ret
-        */
-       public static void copyUpperToLowerTriangle( MatrixBlock ret )
-       {
-               double[] c = ret.getDenseBlock();
-               final int m = ret.getNumRows();
-               final int n = ret.getNumColumns();
-               
-               //copy symmetric values
-               for( int i=0, uix=0; i<m; i++, uix+=n )
-                       for( int j=i+1, lix=j*n+i; j<n; j++, lix+=n )
-                               c[ lix ] = c[ uix+j ];
-       }
-       
-       /**
-        * 
-        * @param ret
-        * @param tmp
-        * @param ix
-        */
-       public static void copyNonZerosToRowCol( MatrixBlock ret, MatrixBlock 
tmp, int ix )
-       {
-               for(int i=0; i<tmp.getNumColumns(); i++) {
-                       double val = tmp.quickGetValue(0, i);
-                       if( val != 0 ) {
-                               ret.setValueDenseUnsafe(ix, i, val);
-                               ret.setValueDenseUnsafe(i, ix, val);
-                       }
-               }
-       }
-       
-       /**
-        * 
-        * @param a
-        * @param x
-        * @return the index of the closest element in a to the value x
-        */
-       public static int getClosestK(int[] a, int x) {
-
-               int low = 0;
-               int high = a.length - 1;
-
-               while (low < high) {
-                       int mid = (low + high) / 2;
-                       int d1 = Math.abs(a[mid] - x);
-                       int d2 = Math.abs(a[mid + 1] - x);
-                       if (d2 <= d1) {
-                               low = mid + 1;
-                       } else {
-                               high = mid;
-                       }
-               }
-               return high;
-       }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.utils;
+
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+/**
+ * Various low-level primitives for compressed matrix blocks, some of which
+ * were copied from LibMatrixMult.
+ * 
+ */
+public class LinearAlgebraUtils {
+
+       /**
+        * 
+        * @param a
+        * @param b
+        * @param len
+        * @return
+        */
+       public static double dotProduct(double[] a, double[] b, final int len) 
+       {
+               double val = 0;
+               final int bn = len % 8;
+
+               // compute rest
+               for (int i = 0; i < bn; i++)
+                       val += a[i] * b[i];
+
+               // unrolled 8-block (for better instruction-level parallelism)
+               for (int i = bn; i < len; i += 8) {
+                       // read 64B cachelines of a and b
+                       // compute cval' = sum(a * b) + cval
+                       val += a[i + 0] * b[i + 0] 
+                                + a[i + 1] * b[i + 1] 
+                                + a[i + 2] * b[i + 2] 
+                                + a[i + 3] * b[i + 3] 
+                                + a[i + 4] * b[i + 4]
+                                + a[i + 5] * b[i + 5] 
+                                + a[i + 6] * b[i + 6] 
+                                + a[i + 7] * b[i + 7];
+               }
+
+               // scalar result
+               return val;
+       }
+       
+       /**
+        * 
+        * @param a
+        * @param b
+        * @param ai
+        * @param bi
+        * @param len
+        * @return
+        */
+       public static double dotProduct( double[] a, double[] b, int ai, int 
bi, final int len )
+       {
+               double val = 0;
+               final int bn = len%8;
+                               
+               //compute rest
+               for( int i = 0; i < bn; i++, ai++, bi++ )
+                       val += a[ ai ] * b[ bi ];
+               
+               //unrolled 8-block (for better instruction-level parallelism)
+               for( int i = bn; i < len; i+=8, ai+=8, bi+=8 )
+               {
+                       //read 64B cachelines of a and b
+                       //compute cval' = sum(a * b) + cval
+                       val += a[ ai+0 ] * b[ bi+0 ]
+                            + a[ ai+1 ] * b[ bi+1 ]
+                            + a[ ai+2 ] * b[ bi+2 ]
+                            + a[ ai+3 ] * b[ bi+3 ]
+                            + a[ ai+4 ] * b[ bi+4 ]
+                            + a[ ai+5 ] * b[ bi+5 ]
+                            + a[ ai+6 ] * b[ bi+6 ]
+                            + a[ ai+7 ] * b[ bi+7 ];
+               }
+               
+               //scalar result
+               return val; 
+       }
+       
+       /**
+        * 
+        * @param a
+        * @param c
+        * @param ai
+        * @param ci
+        * @param len
+        */
+       public static void vectAdd( double[] a, double[] c, int ai, int ci, 
final int len )
+       {
+               final int bn = len%8;
+               
+               //rest, not aligned to 8-blocks
+               for( int j = 0; j < bn; j++, ai++, ci++)
+                       c[ ci ] += a[ ai ];
+               
+               //unrolled 8-block  (for better instruction-level parallelism)
+               for( int j = bn; j < len; j+=8, ai+=8, ci+=8) 
+               {
+                       //read 64B cachelines of a and c
+                       //compute c' = c * a
+                       //write back 64B cacheline of c = c'
+                       c[ ci+0 ] += a[ ai+0 ];
+                       c[ ci+1 ] += a[ ai+1 ];
+                       c[ ci+2 ] += a[ ai+2 ];
+                       c[ ci+3 ] += a[ ai+3 ];
+                       c[ ci+4 ] += a[ ai+4 ];
+                       c[ ci+5 ] += a[ ai+5 ];
+                       c[ ci+6 ] += a[ ai+6 ];
+                       c[ ci+7 ] += a[ ai+7 ];
+               }
+       }
+       
+       /**
+        * 
+        * @param aval
+        * @param b
+        * @param c
+        * @param bix
+        * @param ci
+        * @param len
+        */
+       public static void vectAdd( final double aval, double[] c, char[] bix, 
final int bi, final int ci, final int len )
+       {
+               final int bn = len%8;
+               
+               //rest, not aligned to 8-blocks
+               for( int j = bi; j < bi+bn; j++ )
+                       c[ ci + bix[j] ] += aval;
+               
+               //unrolled 8-block (for better instruction-level parallelism)
+               for( int j = bi+bn; j < bi+len; j+=8 )
+               {
+                       c[ ci+bix[j+0] ] += aval;
+                       c[ ci+bix[j+1] ] += aval;
+                       c[ ci+bix[j+2] ] += aval;
+                       c[ ci+bix[j+3] ] += aval;
+                       c[ ci+bix[j+4] ] += aval;
+                       c[ ci+bix[j+5] ] += aval;
+                       c[ ci+bix[j+6] ] += aval;
+                       c[ ci+bix[j+7] ] += aval;
+               }
+       }
+       
+       /**
+        * 
+        * @param aval
+        * @param c
+        * @param ci
+        * @param len
+        */
+       public static void vectAdd( final double aval, double[] c, final int 
ci, final int len )
+       {
+               final int bn = len%8;
+               
+               //rest, not aligned to 8-blocks
+               for( int j = 0; j < bn; j++ )
+                       c[ ci + j ] += aval;
+               
+               //unrolled 8-block (for better instruction-level parallelism)
+               for( int j = bn; j < len; j+=8 )
+               {
+                       c[ ci+j+0 ] += aval;
+                       c[ ci+j+1 ] += aval;
+                       c[ ci+j+2 ] += aval;
+                       c[ ci+j+3 ] += aval;
+                       c[ ci+j+4 ] += aval;
+                       c[ ci+j+5 ] += aval;
+                       c[ ci+j+6 ] += aval;
+                       c[ ci+j+7 ] += aval;
+               }
+       }
+       
+       /**
+        * 
+        * @param aval
+        * @param b
+        * @param c
+        * @param bix
+        * @param bi
+        * @param ci
+        * @param len
+        */
+       public static void vectMultiplyAdd( final double aval, double[] b, 
double[] c, int[] bix, final int bi, final int ci, final int len )
+       {
+               final int bn = (len-bi)%8;
+               
+               //rest, not aligned to 8-blocks
+               for( int j = bi; j < bi+bn; j++ )
+                       c[ ci + bix[j] ] += aval * b[ j ];
+               
+               //unrolled 8-block (for better instruction-level parallelism)
+               for( int j = bi+bn; j < len; j+=8 )
+               {
+                       c[ ci+bix[j+0] ] += aval * b[ j+0 ];
+                       c[ ci+bix[j+1] ] += aval * b[ j+1 ];
+                       c[ ci+bix[j+2] ] += aval * b[ j+2 ];
+                       c[ ci+bix[j+3] ] += aval * b[ j+3 ];
+                       c[ ci+bix[j+4] ] += aval * b[ j+4 ];
+                       c[ ci+bix[j+5] ] += aval * b[ j+5 ];
+                       c[ ci+bix[j+6] ] += aval * b[ j+6 ];
+                       c[ ci+bix[j+7] ] += aval * b[ j+7 ];
+               }
+       }
+       
+       /**
+        * 
+        * @param aval
+        * @param b
+        * @param c
+        * @param bi
+        * @param ci
+        * @param len
+        */
+       public static void vectMultiplyAdd( final double aval, double[] b, 
double[] c, int bi, int ci, final int len )
+       {
+               final int bn = len%8;
+               
+               //rest, not aligned to 8-blocks
+               for( int j = 0; j < bn; j++, bi++, ci++)
+                       c[ ci ] += aval * b[ bi ];
+               
+               //unrolled 8-block  (for better instruction-level parallelism)
+               for( int j = bn; j < len; j+=8, bi+=8, ci+=8) 
+               {
+                       //read 64B cachelines of b and c
+                       //compute c' = aval * b + c
+                       //write back 64B cacheline of c = c'
+                       c[ ci+0 ] += aval * b[ bi+0 ];
+                       c[ ci+1 ] += aval * b[ bi+1 ];
+                       c[ ci+2 ] += aval * b[ bi+2 ];
+                       c[ ci+3 ] += aval * b[ bi+3 ];
+                       c[ ci+4 ] += aval * b[ bi+4 ];
+                       c[ ci+5 ] += aval * b[ bi+5 ];
+                       c[ ci+6 ] += aval * b[ bi+6 ];
+                       c[ ci+7 ] += aval * b[ bi+7 ];
+               }
+       }
+
+       /**
+        * 
+        * @param a
+        * @param aix
+        * @param ai
+        * @param ai2
+        * @param len
+        * @return
+        */
+       public static double vectSum( double[] a, char[] bix, final int ai, 
final int bi, final int len )
+       {
+               double val = 0;
+               final int bn = len%8;
+               
+               //rest, not aligned to 8-blocks
+               for( int j = bi; j < bi+bn; j++ )
+                       val += a[ ai + bix[j] ];
+               
+               //unrolled 8-block (for better instruction-level parallelism)
+               for( int j = bi+bn; j < bi+len; j+=8 )
+               {
+                       val += a[ ai+bix[j+0] ]
+                            + a[ ai+bix[j+1] ]
+                            + a[ ai+bix[j+2] ]
+                            + a[ ai+bix[j+3] ]
+                            + a[ ai+bix[j+4] ]
+                            + a[ ai+bix[j+5] ]
+                            + a[ ai+bix[j+6] ]
+                            + a[ ai+bix[j+7] ];
+               }
+               
+               return val;
+       }
+       
+       /**
+        * 
+        * @param a
+        * @param ai
+        * @param len
+        * @return
+        */
+       public static double vectSum( double[] a, int ai, final int len )
+       {
+               double val = 0;
+               final int bn = len%8;
+               
+               //rest, not aligned to 8-blocks
+               for( int j = 0; j < bn; j++, ai++ )
+                       val += a[ ai ];
+               
+               //unrolled 8-block (for better instruction-level parallelism)
+               for( int j = bn; j < len; j+=8, ai+=8 )
+               {
+                       val += a[ ai+0 ]
+                            + a[ ai+1 ]
+                            + a[ ai+2 ]
+                            + a[ ai+3 ]
+                            + a[ ai+4 ]
+                            + a[ ai+5 ]
+                            + a[ ai+6 ]
+                            + a[ ai+7 ];
+               }
+               
+               return val;
+       }
+       
+       /**
+        * 
+        * @param ret
+        */
+       public static void copyUpperToLowerTriangle( MatrixBlock ret )
+       {
+               double[] c = ret.getDenseBlock();
+               final int m = ret.getNumRows();
+               final int n = ret.getNumColumns();
+               
+               //copy symmetric values
+               for( int i=0, uix=0; i<m; i++, uix+=n )
+                       for( int j=i+1, lix=j*n+i; j<n; j++, lix+=n )
+                               c[ lix ] = c[ uix+j ];
+       }
+       
+       /**
+        * 
+        * @param ret
+        * @param tmp
+        * @param ix
+        */
+       public static void copyNonZerosToRowCol( MatrixBlock ret, MatrixBlock 
tmp, int ix )
+       {
+               for(int i=0; i<tmp.getNumColumns(); i++) {
+                       double val = tmp.quickGetValue(0, i);
+                       if( val != 0 ) {
+                               ret.setValueDenseUnsafe(ix, i, val);
+                               ret.setValueDenseUnsafe(i, ix, val);
+                       }
+               }
+       }
+       
+       /**
+        * 
+        * @param a
+        * @param x
+        * @return the index of the closest element in a to the value x
+        */
+       public static int getClosestK(int[] a, int x) {
+
+               int low = 0;
+               int high = a.length - 1;
+
+               while (low < high) {
+                       int mid = (low + high) / 2;
+                       int d1 = Math.abs(a[mid] - x);
+                       int d2 = Math.abs(a[mid + 1] - x);
+                       if (d2 <= d1) {
+                               low = mid + 1;
+                       } else {
+                               high = mid;
+                       }
+               }
+               return high;
+       }
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/instructions/cp/PlusMultCPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/PlusMultCPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/PlusMultCPInstruction.java
index 7cb75e5..212e0b7 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/PlusMultCPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/PlusMultCPInstruction.java
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 package org.apache.sysml.runtime.instructions.cp;
 
 import org.apache.sysml.runtime.DMLRuntimeException;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/test/scripts/functions/compress/LinregCG.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/compress/LinregCG.R 
b/src/test/scripts/functions/compress/LinregCG.R
index 1d69385..5dcad95 100644
--- a/src/test/scripts/functions/compress/LinregCG.R
+++ b/src/test/scripts/functions/compress/LinregCG.R
@@ -19,39 +19,39 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-X = readMM(paste(args[1], "X.mtx", sep=""))
-y = readMM(paste(args[1], "y.mtx", sep=""))
-
-intercept = as.integer(args[2]);
-eps = as.double(args[3]);
-maxiter = as.double(args[4]);
-
-if( intercept == 1 ){
-   ones = matrix(1, nrow(X), 1); 
-   X = cbind(X, ones);
-}
-
-r = -(t(X) %*% y);
-p = -r;
-norm_r2 = sum(r * r);
-w = matrix(0, ncol(X), 1);
-
-i = 0;
-while(i < maxiter) {
-       q = ((t(X) %*% (X %*% p)) + eps  * p);
-       alpha = norm_r2 / ((t(p) %*% q)[1:1]);
-       w = w + alpha * p;
-       old_norm_r2 = norm_r2;
-       r = r + alpha * q;
-       norm_r2 = sum(r * r);
-       beta = norm_r2 / old_norm_r2;
-       p = -r + beta * p;
-       i = i + 1;
-}
-
-writeMM(as(w,"CsparseMatrix"), paste(args[5], "w", sep=""))
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X = readMM(paste(args[1], "X.mtx", sep=""))
+y = readMM(paste(args[1], "y.mtx", sep=""))
+
+intercept = as.integer(args[2]);
+eps = as.double(args[3]);
+maxiter = as.double(args[4]);
+
+if( intercept == 1 ){
+   ones = matrix(1, nrow(X), 1); 
+   X = cbind(X, ones);
+}
+
+r = -(t(X) %*% y);
+p = -r;
+norm_r2 = sum(r * r);
+w = matrix(0, ncol(X), 1);
+
+i = 0;
+while(i < maxiter) {
+       q = ((t(X) %*% (X %*% p)) + eps  * p);
+       alpha = norm_r2 / ((t(p) %*% q)[1:1]);
+       w = w + alpha * p;
+       old_norm_r2 = norm_r2;
+       r = r + alpha * q;
+       norm_r2 = sum(r * r);
+       beta = norm_r2 / old_norm_r2;
+       p = -r + beta * p;
+       i = i + 1;
+}
+
+writeMM(as(w,"CsparseMatrix"), paste(args[5], "w", sep=""))

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/test/scripts/functions/compress/LinregCG.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/compress/LinregCG.dml 
b/src/test/scripts/functions/compress/LinregCG.dml
index 02d0fad..85a66e4 100644
--- a/src/test/scripts/functions/compress/LinregCG.dml
+++ b/src/test/scripts/functions/compress/LinregCG.dml
@@ -19,38 +19,38 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-y = read($2);
-intercept = $3;
-eps = $4;
-maxiter = $5;
-
-if( intercept == 1 ){
-   ones = matrix(1, nrow(X), 1); 
-   X = append(X, ones);
-}
-
-r = -(t(X) %*% y);
-p = -r;
-norm_r2 = sum(r * r);
-w = matrix(0, rows = ncol(X), cols = 1);
-
-i = 0;
-while(i < maxiter) {
-       q = ((t(X) %*% (X %*% p)) + eps  * p);
-       alpha = norm_r2 / castAsScalar(t(p) %*% q);
-       w = w + alpha * p;
-       old_norm_r2 = norm_r2;
-       r = r + alpha * q;
-       norm_r2 = sum(r * r);
-       beta = norm_r2 / old_norm_r2;
-       p = -r + beta * p;
-       i = i + 1;
-}
-
-write(w, $6);
-
-
-
-
+
+X = read($1);
+y = read($2);
+intercept = $3;
+eps = $4;
+maxiter = $5;
+
+if( intercept == 1 ){
+   ones = matrix(1, nrow(X), 1); 
+   X = append(X, ones);
+}
+
+r = -(t(X) %*% y);
+p = -r;
+norm_r2 = sum(r * r);
+w = matrix(0, rows = ncol(X), cols = 1);
+
+i = 0;
+while(i < maxiter) {
+       q = ((t(X) %*% (X %*% p)) + eps  * p);
+       alpha = norm_r2 / castAsScalar(t(p) %*% q);
+       w = w + alpha * p;
+       old_norm_r2 = norm_r2;
+       r = r + alpha * q;
+       norm_r2 = sum(r * r);
+       beta = norm_r2 / old_norm_r2;
+       p = -r + beta * p;
+       i = i + 1;
+}
+
+write(w, $6);
+
+
+
+

Reply via email to