This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch experimental/use-fma-in-arraymath
in repository https://gitbox.apache.org/repos/asf/opennlp.git

commit cf6492218b648a71e240dba5ac9faba0f2339cb0
Author: Martin Wiesner <martin.wies...@hs-heilbronn.de>
AuthorDate: Tue Jul 8 19:09:33 2025 +0200

    initial fma impl for ArrayMath#innerProduct for experiments
---
 .../src/main/java/opennlp/tools/ml/ArrayMath.java  | 48 ++++++++++++++++++++--
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/opennlp-api/src/main/java/opennlp/tools/ml/ArrayMath.java 
b/opennlp-api/src/main/java/opennlp/tools/ml/ArrayMath.java
index 0d478f8d..a3471838 100644
--- a/opennlp-api/src/main/java/opennlp/tools/ml/ArrayMath.java
+++ b/opennlp-api/src/main/java/opennlp/tools/ml/ArrayMath.java
@@ -26,15 +26,55 @@ import opennlp.tools.ml.model.Context;
  */
 public class ArrayMath {
 
+  private static final String OS_NAME = System.getProperty("os.name", 
"Unknown");
+  private static final String OS_ARCH = System.getProperty("os.arch", 
"Unknown");
+  private static final boolean MAC_OS_X = OS_NAME.startsWith("Mac OS X");
+
+  private static boolean hasHWVectorFMA() {
+    // aarch64 has hw fma, but not on silicon
+    if (OS_ARCH.equals("aarch64") && !MAC_OS_X) {
+      return true;
+    }
+    // intel et al. support it nowadays
+    if (OS_ARCH.equals("amd64")) {
+      return true;
+    }
+    // otherwise
+    return false;
+  }
+
   public static double innerProduct(double[] vecA, double[] vecB) {
     if (vecA == null || vecB == null || vecA.length != vecB.length)
       return Double.NaN;
 
-    double product = 0.0;
-    for (int i = 0; i < vecA.length; i++) {
-      product += vecA[i] * vecB[i];
+    if (hasHWVectorFMA()) {
+      double product = 0;
+      int i = 0;
+
+      // unroll, in case the arrays are large enough
+      if (vecA.length > 32) {
+        double acc1 = 0, acc2 = 0, acc3 = 0, acc4 = 0;
+        int upperBound = vecA.length & ~(4 - 1);
+        for (; i < upperBound; i += 4) {
+          acc1 = StrictMath.fma(vecA[i], vecB[i], acc1);
+          acc2 = StrictMath.fma(vecA[i + 1], vecB[i + 1], acc2);
+          acc3 = StrictMath.fma(vecA[i + 2], vecB[i + 2], acc3);
+          acc4 = StrictMath.fma(vecA[i + 3], vecB[i + 3], acc4);
+        }
+        product += acc1 + acc2 + acc3 + acc4;
+      }
+
+      for (; i < vecA.length; i++) {
+        product = StrictMath.fma(vecA[i], vecB[i], product);
+      }
+      return product;
+    } else {
+      double product = 0.0;
+      for (int i = 0; i < vecA.length; i++) {
+        product += vecA[i] * vecB[i];
+      }
+      return product;
     }
-    return product;
   }
 
   /**

Reply via email to