This is an automated email from the ASF dual-hosted git repository.

hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git


The following commit(s) were added to refs/heads/main by this push:
     new 35b84ae642 Minor improvement over formula plugin (#5689)
35b84ae642 is described below

commit 35b84ae6429bfa14fc6b703a9aaf0dff3e1ab51c
Author: Romain Manni-Bucau <[email protected]>
AuthorDate: Mon Sep 8 15:18:04 2025 +0200

    Minor improvement over formula plugin (#5689)
---
 .../main/java/org/apache/hop/core/row/RowMeta.java |   2 +-
 .../java/org/apache/hop/core/util/StringUtil.java  |  28 +--
 .../hop/pipeline/transforms/formula/Formula.java   |  64 ++++---
 .../pipeline/transforms/formula/FormulaPoi.java    | 162 +++++++++++++++++
 .../formula/util/FormulaFieldsExtractor.java       |  50 ++++++
 .../transforms/formula/util/FormulaParser.java     |  76 ++++----
 .../transforms/formula/util/FormulaParserTest.java | 195 +++++++++++++++++++++
 7 files changed, 505 insertions(+), 72 deletions(-)

diff --git a/core/src/main/java/org/apache/hop/core/row/RowMeta.java 
b/core/src/main/java/org/apache/hop/core/row/RowMeta.java
index 35e517d6d4..27645fa2b6 100644
--- a/core/src/main/java/org/apache/hop/core/row/RowMeta.java
+++ b/core/src/main/java/org/apache/hop/core/row/RowMeta.java
@@ -1314,7 +1314,7 @@ public class RowMeta implements IRowMeta {
       Integer index = mapping.get(name);
       if (index != null) {
         IValueMeta value = metas.get(index);
-        if (!name.equalsIgnoreCase(value.getName())) {
+        if (!name.equals(value.getName())) { // case insensitive since we 
lowercase
           mapping.remove(name);
           index = null;
         }
diff --git a/core/src/main/java/org/apache/hop/core/util/StringUtil.java 
b/core/src/main/java/org/apache/hop/core/util/StringUtil.java
index 920f27272c..424eedc58b 100644
--- a/core/src/main/java/org/apache/hop/core/util/StringUtil.java
+++ b/core/src/main/java/org/apache/hop/core/util/StringUtil.java
@@ -20,7 +20,6 @@ package org.apache.hop.core.util;
 import java.text.DateFormat;
 import java.text.Normalizer;
 import java.text.SimpleDateFormat;
-import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
@@ -151,12 +150,16 @@ public class StringUtil {
       return null;
     }
 
-    StringBuilder buffer = new StringBuilder();
+    // search for opening string
+    int i = aString.indexOf(open);
+    if (i < 0) // no match, no need to instantiate the string builder
+    {
+      return aString;
+    }
 
     String rest = aString;
 
-    // search for opening string
-    int i = rest.indexOf(open);
+    StringBuilder buffer = new StringBuilder();
     while (i > -1) {
       int j = rest.indexOf(close, i + open.length());
       // search for closing string
@@ -251,20 +254,17 @@ public class StringUtil {
    * properties
    *
    * @param aString the string on which to apply the substitution.
-   * @param systemProperties the system properties to use
+   * @param systemProperties the system properties to use (ensure read is 
thread safe in calling
+   *     context)
    * @return the string with the substitution applied.
    */
   public static final synchronized String environmentSubstitute(
       String aString, Map<String, String> systemProperties) {
-    Map<String, String> sysMap = new HashMap<>();
-    synchronized (sysMap) {
-      sysMap.putAll(Collections.synchronizedMap(systemProperties));
-
-      aString = substituteWindows(aString, sysMap);
-      aString = substituteUnix(aString, sysMap);
-      aString = substituteHex(aString);
-      return aString;
-    }
+    // system properties are thread safe normally in our usages
+    aString = substituteWindows(aString, systemProperties);
+    aString = substituteUnix(aString, systemProperties);
+    aString = substituteHex(aString);
+    return aString;
   }
 
   /**
diff --git 
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/Formula.java
 
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/Formula.java
index cfafc94670..925084b7ca 100644
--- 
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/Formula.java
+++ 
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/Formula.java
@@ -21,6 +21,8 @@ import java.io.IOException;
 import java.sql.Timestamp;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.List;
+import java.util.stream.IntStream;
 import org.apache.hop.core.exception.HopException;
 import org.apache.hop.core.exception.HopTransformException;
 import org.apache.hop.core.row.IValueMeta;
@@ -31,42 +33,45 @@ import org.apache.hop.pipeline.Pipeline;
 import org.apache.hop.pipeline.PipelineMeta;
 import org.apache.hop.pipeline.transform.BaseTransform;
 import org.apache.hop.pipeline.transform.TransformMeta;
+import org.apache.hop.pipeline.transforms.formula.util.FormulaFieldsExtractor;
 import org.apache.hop.pipeline.transforms.formula.util.FormulaParser;
 import org.apache.poi.ss.usermodel.CellType;
 import org.apache.poi.ss.usermodel.CellValue;
 import org.apache.poi.ss.usermodel.DateUtil;
-import org.apache.poi.ss.usermodel.Row;
-import org.apache.poi.xssf.usermodel.XSSFSheet;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 
 public class Formula extends BaseTransform<FormulaMeta, FormulaData> {
 
-  private XSSFWorkbook workBook;
-  private XSSFSheet workSheet;
-  private Row sheetRow;
-  private HashMap<String, String> replaceMap;
+  private FormulaPoi[] poi;
+  private List<String>[] formulaFieldLists;
+  private final HashMap<String, String> replaceMap = new HashMap<>();
 
   @Override
   public boolean init() {
-
-    workBook = new XSSFWorkbook();
-    workSheet = workBook.createSheet();
-    sheetRow = workSheet.createRow(0);
-    replaceMap = new HashMap<>();
-
     return true;
   }
 
   @Override
   public void dispose() {
-    try {
-      workBook.close();
-    } catch (IOException e) {
-      logError("Unable to close temporary workbook", e);
+    if (poi != null) {
+      for (final var it : poi) {
+        try {
+          it.destroy();
+        } catch (IOException e) {
+          logError("Unable to close temporary workbook", e);
+        }
+      }
     }
     super.dispose();
   }
 
+  @Override
+  public void batchComplete() throws HopException {
+    super.batchComplete();
+    for (final var it : poi) {
+      it.reset();
+    }
+  }
+
   @Override
   public boolean processRow() throws HopException {
 
@@ -112,6 +117,18 @@ public class Formula extends BaseTransform<FormulaMeta, 
FormulaData> {
           data.replaceIndex[j] = -1;
         }
       }
+
+      // create one backing row per formula
+      poi =
+          IntStream.range(0, meta.getFormulas().size())
+              .mapToObj(it -> new FormulaPoi(this::logDebug))
+              .toArray(FormulaPoi[]::new);
+      // compute only once for all rows the default field list
+      formulaFieldLists =
+          meta.getFormulas().stream()
+              .map(FormulaMetaFunction::getFormula)
+              .map(FormulaFieldsExtractor::getFormulaFieldList)
+              .toArray(List[]::new);
     }
 
     int tempIndex = getInputRowMeta().size();
@@ -120,11 +137,6 @@ public class Formula extends BaseTransform<FormulaMeta, 
FormulaData> {
       logRowlevel("Read row #" + getLinesRead() + " : " + Arrays.toString(r));
     }
 
-    if (sheetRow != null) {
-      workSheet.removeRow(sheetRow);
-    }
-    sheetRow = workSheet.createRow(0);
-
     Object[] outputRowData = RowDataUtil.resizeArray(r, 
data.outputRowMeta.size());
     Object outputValue = null;
 
@@ -133,7 +145,13 @@ public class Formula extends BaseTransform<FormulaMeta, 
FormulaData> {
       FormulaMetaFunction formula = meta.getFormulas().get(i);
       FormulaParser parser =
           new FormulaParser(
-              formula, data.outputRowMeta, outputRowData, sheetRow, variables, 
replaceMap);
+              formula,
+              data.outputRowMeta,
+              outputRowData,
+              poi[i],
+              variables,
+              replaceMap,
+              formulaFieldLists[i]);
       try {
         CellValue cellValue = parser.getFormulaValue();
         CellType cellType = cellValue.getCellType();
diff --git 
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/FormulaPoi.java
 
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/FormulaPoi.java
new file mode 100644
index 0000000000..d810c5ae88
--- /dev/null
+++ 
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/FormulaPoi.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hop.pipeline.transforms.formula;
+
+import java.io.IOException;
+import java.util.function.Consumer;
+import org.apache.poi.hssf.usermodel.HSSFFormulaEvaluator;
+import org.apache.poi.hssf.usermodel.HSSFRow;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.ss.usermodel.FormulaEvaluator;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.xssf.usermodel.XSSFFormulaEvaluator;
+import org.apache.poi.xssf.usermodel.XSSFRow;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+
+// very thin "factory abstraction" (mainly accessors) on top of HSS/XSS 
workbooks
+// the diff in perf is literally x5 so if we can use HSS it is a win for end 
users
+// but it is limited in number of columns
+public class FormulaPoi {
+  private final Consumer<String> log;
+  private XSS slow;
+  private HSS fast;
+
+  public FormulaPoi(final Consumer<String> log) {
+    this.log = log;
+  }
+
+  public Evaluator evaluator(final int columns) {
+    return columns <= 256
+        ? (fast == null ? fast = new HSS() : fast).incr()
+        : (slow == null ? slow = new XSS() : slow);
+  }
+
+  public void reset() {
+    if (fast != null) { // force a reset after a chunk to enable to control 
the memory this way too
+      fast.currentCount = HSS.RESET_INTERVAL;
+      fast.incr();
+    }
+  }
+
+  public void destroy() throws IOException {
+    if (slow != null) {
+      slow.workBook.close();
+    }
+    if (fast != null) {
+      fast.workBook.close();
+    }
+  }
+
+  public interface Evaluator {
+    Sheet sheet();
+
+    Row row();
+
+    void row(Row row);
+
+    FormulaEvaluator evaluator();
+  }
+
+  private class XSS implements Evaluator {
+    private final XSSFWorkbook workBook;
+    private final XSSFSheet workSheet;
+    private XSSFRow sheetRow;
+    private final XSSFFormulaEvaluator evaluator;
+
+    private XSS() {
+      log.accept("using xss implementation which is slow");
+      workBook = new XSSFWorkbook();
+      workBook.setCellFormulaValidation(false); // we parse it anyway so no 
need to do it twice
+      workSheet = workBook.createSheet();
+      sheetRow = workSheet.createRow(0);
+      evaluator = 
sheetRow.getSheet().getWorkbook().getCreationHelper().createFormulaEvaluator();
+    }
+
+    @Override
+    public Sheet sheet() {
+      return workSheet;
+    }
+
+    @Override
+    public Row row() {
+      return sheetRow;
+    }
+
+    @Override
+    public void row(final Row row) {
+      sheetRow = (XSSFRow) row;
+    }
+
+    @Override
+    public FormulaEvaluator evaluator() {
+      return evaluator;
+    }
+  }
+
+  // HSS is fast compared to XSS but it stores unicodestrings
+  // in the internalworkbook of the workbook
+  // so we must reset it from time to time
+  private static class HSS implements Evaluator {
+    private static final int RESET_INTERVAL =
+        Integer.getInteger(HSS.class.getName().replace('$', '.') + 
".resetInterval", 10_000);
+
+    private HSSFWorkbook workBook;
+    private HSSFSheet workSheet;
+    private HSSFRow sheetRow;
+    private HSSFFormulaEvaluator evaluator;
+
+    private int currentCount = RESET_INTERVAL;
+
+    private HSS() {
+      incr();
+    }
+
+    @Override
+    public Sheet sheet() {
+      return workSheet;
+    }
+
+    @Override
+    public Row row() {
+      return sheetRow;
+    }
+
+    @Override
+    public void row(final Row row) {
+      sheetRow = (HSSFRow) row;
+    }
+
+    @Override
+    public FormulaEvaluator evaluator() {
+      return evaluator;
+    }
+
+    public Evaluator incr() {
+      if (currentCount++ == RESET_INTERVAL) {
+        workBook = new HSSFWorkbook();
+        workSheet = workBook.createSheet();
+        sheetRow = workSheet.createRow(0);
+        evaluator = 
sheetRow.getSheet().getWorkbook().getCreationHelper().createFormulaEvaluator();
+        currentCount = 0;
+      }
+      return this;
+    }
+  }
+}
diff --git 
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaFieldsExtractor.java
 
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaFieldsExtractor.java
new file mode 100644
index 0000000000..7faaab5722
--- /dev/null
+++ 
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaFieldsExtractor.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hop.pipeline.transforms.formula.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public final class FormulaFieldsExtractor {
+  private FormulaFieldsExtractor() {
+    // no-op
+  }
+
+  // avoid slow wildcard base regex compiled every time for such a simple logic
+  public static List<String> getFormulaFieldList(final String formula) {
+    List<String> theFields = new ArrayList<>();
+
+    int from = 0;
+    do {
+      final int fieldStart = formula.indexOf('[', from);
+      if (fieldStart < 0) {
+        break;
+      }
+      final int fieldEnd = formula.indexOf(']', fieldStart);
+      if (fieldEnd < 0) {
+        from = fieldStart + 1;
+      } else {
+        from = fieldEnd + 1;
+        if (fieldEnd > fieldStart - 1) {
+          theFields.add(formula.substring(fieldStart + 1, fieldEnd));
+        }
+      }
+    } while (from < formula.length() - 1);
+
+    return theFields;
+  }
+}
diff --git 
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParser.java
 
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParser.java
index 5e5ed93479..54dfdf6beb 100644
--- 
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParser.java
+++ 
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParser.java
@@ -17,52 +17,47 @@
 
 package org.apache.hop.pipeline.transforms.formula.util;
 
-import java.util.ArrayList;
+import static 
org.apache.hop.pipeline.transforms.formula.util.FormulaFieldsExtractor.getFormulaFieldList;
+
 import java.util.HashMap;
 import java.util.List;
 import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 import org.apache.hop.core.exception.HopValueException;
 import org.apache.hop.core.row.IRowMeta;
 import org.apache.hop.core.row.IValueMeta;
 import org.apache.hop.core.variables.IVariables;
 import org.apache.hop.pipeline.transforms.formula.FormulaMetaFunction;
+import org.apache.hop.pipeline.transforms.formula.FormulaPoi;
 import org.apache.poi.hssf.usermodel.HSSFRichTextString;
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.CellValue;
-import org.apache.poi.ss.usermodel.FormulaEvaluator;
 import org.apache.poi.ss.usermodel.Row;
 
 public class FormulaParser {
 
   private FormulaMetaFunction formulaMetaFunction;
   private IRowMeta rowMeta;
-  private String[] fieldNames;
   private String formula;
   private List<String> formulaFieldList;
   private Object[] dataRow;
-  private Row sheetRow;
-  private FormulaEvaluator evaluator;
+  private FormulaPoi.Evaluator evaluator;
   private HashMap<String, String> replaceMap;
 
   public FormulaParser(
       FormulaMetaFunction formulaMetaFunction,
       IRowMeta rowMeta,
       Object[] dataRow,
-      Row sheetRow,
+      FormulaPoi poi,
       IVariables variables,
-      HashMap<String, String> replaceMap) {
+      HashMap<String, String> replaceMap,
+      List<String> formulaFieldList) {
     this.formulaMetaFunction = formulaMetaFunction;
     this.rowMeta = rowMeta;
     this.dataRow = dataRow;
-    this.sheetRow = sheetRow;
-    fieldNames = rowMeta.getFieldNames();
     this.replaceMap = replaceMap;
     formula = variables.resolve(formulaMetaFunction.getFormula());
-    evaluator = 
sheetRow.getSheet().getWorkbook().getCreationHelper().createFormulaEvaluator();
 
-    formulaFieldList = getFormulaFieldList(formula);
+    this.formulaFieldList = formulaFieldList;
 
     boolean getNewList = false;
     for (String formulaField : formulaFieldList) {
@@ -70,42 +65,52 @@ public class FormulaParser {
       Set<String> replaceKeys = replaceMap.keySet();
       if (replaceKeys.contains(formulaField)) {
         String realFieldName = replaceMap.get(formulaField);
-        formula = formula.replaceAll("\\[" + formulaField + "\\]", "\\[" + 
realFieldName + "\\]");
+        formula = formula.replace("[" + formulaField + "]", "[" + 
realFieldName + "]");
         getNewList = true;
       }
     }
 
     if (getNewList) {
-      formulaFieldList = getFormulaFieldList(formula);
+      this.formulaFieldList = getFormulaFieldList(formula);
     }
-  }
-
-  private List<String> getFormulaFieldList(String formula) {
-    List<String> theFields = new ArrayList<>();
-    Pattern regex = Pattern.compile("\\[(.*?)\\]");
-    Matcher regexMatcher = regex.matcher(formula);
-
-    while (regexMatcher.find()) {
-      theFields.add(regexMatcher.group(1));
-    }
-    return theFields;
+    this.evaluator = poi.evaluator(formulaFieldList.size() + 1);
+    this.evaluator.evaluator().clearAllCachedResultValues();
   }
 
   public CellValue getFormulaValue() throws HopValueException {
     String parsedFormula = formula;
     int fieldIndex = 65;
     int colIndex = 0;
+    Row row = evaluator.row();
+
+    // reset, something changed else reuse to leverage the formula parsing 
cache which does speed up
+    // a lot the runtime
+    if (row.getLastCellNum() > 0 && row.getLastCellNum() != 
formulaFieldList.size() + 1) {
+      if (evaluator.row() != null) {
+        evaluator.sheet().removeRow(evaluator.row());
+      }
+      row = evaluator.sheet().createRow(0);
+      evaluator.row(row);
+    }
+
     for (String formulaField : formulaFieldList) {
       char s = (char) fieldIndex;
-      Cell cell = sheetRow.createCell(colIndex);
+      final Cell cell;
+      if (row.getLastCellNum() <= colIndex) {
+        cell = row.createCell(colIndex);
+      } else {
+        cell = row.getCell(colIndex);
+      }
 
       int fieldPosition = rowMeta.indexOfValue(formulaField);
 
-      parsedFormula = parsedFormula.replaceAll("\\[" + formulaField + "\\]", s 
+ "1");
+      parsedFormula = parsedFormula.replace("[" + formulaField + "]", s + "1");
 
       IValueMeta fieldMeta = rowMeta.getValueMeta(fieldPosition);
       if (dataRow[fieldPosition] != null) {
-        if (fieldMeta.isBoolean()) {
+        if (fieldMeta.isString()) { // most common first to avoid a lot of 
"if" for nothing
+          cell.setCellValue(rowMeta.getString(dataRow, fieldPosition));
+        } else if (fieldMeta.isBoolean()) {
           cell.setCellValue(rowMeta.getBoolean(dataRow, fieldPosition));
         } else if (fieldMeta.isBigNumber()) {
           cell.setCellValue(new HSSFRichTextString(rowMeta.getString(dataRow, 
fieldPosition)));
@@ -115,8 +120,6 @@ public class FormulaParser {
           cell.setCellValue(rowMeta.getInteger(dataRow, fieldPosition));
         } else if (fieldMeta.isNumber()) {
           cell.setCellValue(rowMeta.getNumber(dataRow, fieldPosition));
-        } else if (fieldMeta.isString()) {
-          cell.setCellValue(rowMeta.getString(dataRow, fieldPosition));
         } else {
           cell.setCellValue(rowMeta.getString(dataRow, fieldPosition));
         }
@@ -132,9 +135,14 @@ public class FormulaParser {
       colIndex++;
     }
 
-    Cell formulaCell = sheetRow.createCell(colIndex);
-    formulaCell.setCellFormula(parsedFormula);
+    final Cell formulaCell;
+    if (row.getLastCellNum() <= colIndex) {
+      formulaCell = row.createCell(colIndex);
+      formulaCell.setCellFormula(parsedFormula);
+    } else { // already created/parsed
+      formulaCell = row.getCell(colIndex);
+    }
 
-    return evaluator.evaluate(formulaCell);
+    return evaluator.evaluator().evaluate(formulaCell);
   }
 }
diff --git 
a/plugins/transforms/formula/src/test/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParserTest.java
 
b/plugins/transforms/formula/src/test/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParserTest.java
new file mode 100644
index 0000000000..68a721a315
--- /dev/null
+++ 
b/plugins/transforms/formula/src/test/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParserTest.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hop.pipeline.transforms.formula.util;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.hop.core.BlockingRowSet;
+import org.apache.hop.core.exception.HopException;
+import org.apache.hop.core.exception.HopPluginException;
+import org.apache.hop.core.logging.HopLogStore;
+import org.apache.hop.core.logging.ILogChannel;
+import org.apache.hop.core.logging.ILogChannelFactory;
+import org.apache.hop.core.logging.ILogMessage;
+import org.apache.hop.core.logging.ILoggingObject;
+import org.apache.hop.core.logging.LogChannel;
+import org.apache.hop.core.logging.LogLevel;
+import org.apache.hop.core.plugins.PluginRegistry;
+import org.apache.hop.core.plugins.SupplementalPlugin;
+import org.apache.hop.core.row.IRowMeta;
+import org.apache.hop.core.row.IValueMeta;
+import org.apache.hop.core.row.RowMeta;
+import org.apache.hop.core.row.value.ValueMetaBigNumber;
+import org.apache.hop.core.row.value.ValueMetaPluginType;
+import org.apache.hop.core.row.value.ValueMetaString;
+import org.apache.hop.pipeline.Pipeline;
+import org.apache.hop.pipeline.PipelineMeta;
+import org.apache.hop.pipeline.config.PipelineRunConfiguration;
+import org.apache.hop.pipeline.engine.PipelineEngineCapabilities;
+import org.apache.hop.pipeline.transform.BaseTransformMeta;
+import org.apache.hop.pipeline.transform.RowAdapter;
+import org.apache.hop.pipeline.transform.TransformMeta;
+import org.apache.hop.pipeline.transforms.formula.Formula;
+import org.apache.hop.pipeline.transforms.formula.FormulaData;
+import org.apache.hop.pipeline.transforms.formula.FormulaMeta;
+import org.apache.hop.pipeline.transforms.formula.FormulaMetaFunction;
+import org.junit.jupiter.api.Test;
+
+class FormulaParserTest {
+  static {
+    HopLogStore.setLogChannelFactory(new ConsoleLogChannelFactory());
+
+    try {
+      final var plugin = new SupplementalPlugin(ValueMetaPluginType.class, 
"2");
+      plugin.addFactory(IValueMeta.class, ValueMetaString::new);
+      PluginRegistry.getInstance().registerPlugin(ValueMetaPluginType.class, 
plugin);
+    } catch (final HopPluginException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
+  // mainly a perf test since originally it was very slow
+  @Test
+  void formula() throws HopException {
+    final var rowMeta = new RowMeta();
+    rowMeta.setValueMetaList(
+        List.of(
+            new ValueMetaBigNumber("FOO"),
+            new ValueMetaString("BAR"),
+            new ValueMetaString("DUMMY"),
+            new ValueMetaString("LAST")));
+
+    final int maxSize =
+        Integer.getInteger(getClass().getName() + ".formula.maxSize", 
/*1_000_000*/ 1_000);
+    final var row = new BlockingRowSet(maxSize);
+    for (var i = 0; i < maxSize; i++) {
+      row.putRow(rowMeta, new Object[] {new BigDecimal(-i), "A", "B", "B"});
+    }
+    row.setDone();
+
+    final var meta = new FormulaMeta();
+    meta.setFormulas(
+        List.of(
+            new FormulaMetaFunction(
+                "EVAL1",
+                "IF(" + "[BAR] = \"A\", " + "IF(ISBLANK([DUMMY]), [LAST], 
[DUMMY])," + " [LAST])",
+                IValueMeta.TYPE_STRING,
+                -1,
+                -1,
+                "",
+                false),
+            new FormulaMetaFunction(
+                "EVAL2",
+                "TEXT(ABS([FOO]) , \"#.#######\")",
+                IValueMeta.TYPE_STRING,
+                -1,
+                -1,
+                "",
+                false)));
+
+    final var transformMeta = new BaseTransformMeta<>();
+    final var transformMetadata = new TransformMeta("1", "test", 
transformMeta);
+
+    final var pipelineMeta = new PipelineMeta();
+    pipelineMeta.addTransform(transformMetadata);
+
+    final var pipeline =
+        new Pipeline() {
+          private final PipelineEngineCapabilities pipelineEngineCapabilities =
+              new PipelineEngineCapabilities();
+          private final PipelineRunConfiguration pipelineRunConfiguration =
+              new PipelineRunConfiguration();
+
+          @Override
+          public PipelineEngineCapabilities getEngineCapabilities() {
+            return pipelineEngineCapabilities;
+          }
+
+          @Override
+          public PipelineRunConfiguration getPipelineRunConfiguration() {
+            return pipelineRunConfiguration;
+          }
+
+          @Override
+          public String getStatusDescription() {
+            return "";
+          }
+        };
+    pipeline.setRunning(true);
+
+    final var data = new FormulaData();
+    data.outputRowMeta = new RowMeta();
+
+    final var formula = new Formula(transformMetadata, meta, data, 1, 
pipelineMeta, pipeline);
+
+    final var counter = new AtomicInteger();
+    formula.addRowListener(
+        new RowAdapter() {
+          @Override
+          public void rowWrittenEvent(final IRowMeta rowMeta, final Object[] 
row) {
+            assertEquals("B", row[4]);
+            assertEquals(Integer.toString(counter.getAndIncrement()), row[5]);
+          }
+        });
+
+    formula.init();
+    formula.startBundle();
+    formula.setInputRowSets(new ArrayList<>(List.of(row)));
+    while (formula.processRow())
+      ;
+    formula.finishBundle();
+    formula.setOutputDone();
+    pipeline.setRunning(false);
+
+    assertEquals(maxSize, counter.get());
+  }
+
+  private static class ConsoleLogChannelFactory implements ILogChannelFactory {
+    private final ILogChannel simpleLog =
+        new LogChannel("test") {
+          @Override
+          public void println(final ILogMessage logMessage, final LogLevel 
channelLogLevel) {
+            System.out.println("[" + channelLogLevel + "] " + logMessage);
+          }
+        };
+
+    @Override
+    public ILogChannel create(final Object subject) {
+      return simpleLog;
+    }
+
+    @Override
+    public ILogChannel create(final Object subject, final boolean 
gatheringMetrics) {
+      return simpleLog;
+    }
+
+    @Override
+    public ILogChannel create(final Object subject, final ILoggingObject 
parentObject) {
+      return simpleLog;
+    }
+
+    @Override
+    public ILogChannel create(
+        final Object subject, final ILoggingObject parentObject, final boolean 
gatheringMetrics) {
+      return simpleLog;
+    }
+  }
+}

Reply via email to