(systemds) branch main updated: [SYSTEMDS-3920] Vector API for Codegen Outer-Products

mboehm7 Sat, 01 Nov 2025 02:45:56 -0700

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git



The following commit(s) were added to refs/heads/main by this push:
     new e40bbfec12 [SYSTEMDS-3920] Vector API for Codegen Outer-Products
e40bbfec12 is described below

commit e40bbfec12eb18620f7f18436ca85685356a703a
Author: Hubert Krawczyk <[email protected]>
AuthorDate: Sat Nov 1 10:41:25 2025 +0100

    [SYSTEMDS-3920] Vector API for Codegen Outer-Products
    
    Closes #2349.
---
 .../sysds/runtime/codegen/LibSpoofPrimitives.java   | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/codegen/LibSpoofPrimitives.java 
b/src/main/java/org/apache/sysds/runtime/codegen/LibSpoofPrimitives.java
index bc6ba19895..ebb42676f0 100644
--- a/src/main/java/org/apache/sysds/runtime/codegen/LibSpoofPrimitives.java
+++ b/src/main/java/org/apache/sysds/runtime/codegen/LibSpoofPrimitives.java
@@ -109,27 +109,12 @@ public class LibSpoofPrimitives
                        }
                }
                else {
-                       //rest, not aligned to 4-blocks
-                       final int bn = len1%4;
-                       for( int i=0, cix=ci; i < bn; i++, cix+=len2 )
+                       for( int i=0, cix=ci; i < len1; i++, cix+=len2)
                                if( a[ai+i] != 0 )
                                        LibMatrixMult.vectMultiplyAdd(a[ai+i], 
b, c, bi, cix, len2);
-                       
-                       //unrolled 4-block (for fewer L1-dcache loads)
-                       for( int i=bn, cix=ci+bn*len2; i < len1; i+=4, 
cix+=4*len2 ) {
-                               final int cix1=cix, cix2=cix+len2, 
cix3=cix+2*len2, cix4=cix+3*len2;
-                               final double aval1=a[ai+i], aval2=a[ai+i+1], 
aval3=a[ai+i+2], aval4=a[ai+i+3];
-                               for( int j=0; j<len2; j++ ) {
-                                       final double bval = b[bi+j];
-                                       c[cix1 + j] += aval1 * bval;
-                                       c[cix2 + j] += aval2 * bval;
-                                       c[cix3 + j] += aval3 * bval;
-                                       c[cix4 + j] += aval4 * bval;
-                               }
-                       }       
-               }       
+               }
        }
-       
+
        public static void vectOuterMultAdd(double[] a, double[] b, double[] c, 
int[] aix, int ai, int bi, int ci, int alen, int len1, int len2) {
                if( isFlipOuter(len1, len2) ) {
                        for( int i=0, cix=ci; i < len2; i++, cix+=len1 ) {

(systemds) branch main updated: [SYSTEMDS-3920] Vector API for Codegen Outer-Products

Reply via email to