This is an automated email from the ASF dual-hosted git repository.
hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git
The following commit(s) were added to refs/heads/main by this push:
new 35b84ae642 Minor improvement over formula plugin (#5689)
35b84ae642 is described below
commit 35b84ae6429bfa14fc6b703a9aaf0dff3e1ab51c
Author: Romain Manni-Bucau <[email protected]>
AuthorDate: Mon Sep 8 15:18:04 2025 +0200
Minor improvement over formula plugin (#5689)
---
.../main/java/org/apache/hop/core/row/RowMeta.java | 2 +-
.../java/org/apache/hop/core/util/StringUtil.java | 28 +--
.../hop/pipeline/transforms/formula/Formula.java | 64 ++++---
.../pipeline/transforms/formula/FormulaPoi.java | 162 +++++++++++++++++
.../formula/util/FormulaFieldsExtractor.java | 50 ++++++
.../transforms/formula/util/FormulaParser.java | 76 ++++----
.../transforms/formula/util/FormulaParserTest.java | 195 +++++++++++++++++++++
7 files changed, 505 insertions(+), 72 deletions(-)
diff --git a/core/src/main/java/org/apache/hop/core/row/RowMeta.java
b/core/src/main/java/org/apache/hop/core/row/RowMeta.java
index 35e517d6d4..27645fa2b6 100644
--- a/core/src/main/java/org/apache/hop/core/row/RowMeta.java
+++ b/core/src/main/java/org/apache/hop/core/row/RowMeta.java
@@ -1314,7 +1314,7 @@ public class RowMeta implements IRowMeta {
Integer index = mapping.get(name);
if (index != null) {
IValueMeta value = metas.get(index);
- if (!name.equalsIgnoreCase(value.getName())) {
+ if (!name.equals(value.getName())) { // case insensitive since we
lowercase
mapping.remove(name);
index = null;
}
diff --git a/core/src/main/java/org/apache/hop/core/util/StringUtil.java
b/core/src/main/java/org/apache/hop/core/util/StringUtil.java
index 920f27272c..424eedc58b 100644
--- a/core/src/main/java/org/apache/hop/core/util/StringUtil.java
+++ b/core/src/main/java/org/apache/hop/core/util/StringUtil.java
@@ -20,7 +20,6 @@ package org.apache.hop.core.util;
import java.text.DateFormat;
import java.text.Normalizer;
import java.text.SimpleDateFormat;
-import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
@@ -151,12 +150,16 @@ public class StringUtil {
return null;
}
- StringBuilder buffer = new StringBuilder();
+ // search for opening string
+ int i = aString.indexOf(open);
+ if (i < 0) // no match, no need to instantiate the string builder
+ {
+ return aString;
+ }
String rest = aString;
- // search for opening string
- int i = rest.indexOf(open);
+ StringBuilder buffer = new StringBuilder();
while (i > -1) {
int j = rest.indexOf(close, i + open.length());
// search for closing string
@@ -251,20 +254,17 @@ public class StringUtil {
* properties
*
* @param aString the string on which to apply the substitution.
- * @param systemProperties the system properties to use
+ * @param systemProperties the system properties to use (ensure read is
thread safe in calling
+ * context)
* @return the string with the substitution applied.
*/
public static final synchronized String environmentSubstitute(
String aString, Map<String, String> systemProperties) {
- Map<String, String> sysMap = new HashMap<>();
- synchronized (sysMap) {
- sysMap.putAll(Collections.synchronizedMap(systemProperties));
-
- aString = substituteWindows(aString, sysMap);
- aString = substituteUnix(aString, sysMap);
- aString = substituteHex(aString);
- return aString;
- }
+ // system properties are thread safe normally in our usages
+ aString = substituteWindows(aString, systemProperties);
+ aString = substituteUnix(aString, systemProperties);
+ aString = substituteHex(aString);
+ return aString;
}
/**
diff --git
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/Formula.java
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/Formula.java
index cfafc94670..925084b7ca 100644
---
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/Formula.java
+++
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/Formula.java
@@ -21,6 +21,8 @@ import java.io.IOException;
import java.sql.Timestamp;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.List;
+import java.util.stream.IntStream;
import org.apache.hop.core.exception.HopException;
import org.apache.hop.core.exception.HopTransformException;
import org.apache.hop.core.row.IValueMeta;
@@ -31,42 +33,45 @@ import org.apache.hop.pipeline.Pipeline;
import org.apache.hop.pipeline.PipelineMeta;
import org.apache.hop.pipeline.transform.BaseTransform;
import org.apache.hop.pipeline.transform.TransformMeta;
+import org.apache.hop.pipeline.transforms.formula.util.FormulaFieldsExtractor;
import org.apache.hop.pipeline.transforms.formula.util.FormulaParser;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.CellValue;
import org.apache.poi.ss.usermodel.DateUtil;
-import org.apache.poi.ss.usermodel.Row;
-import org.apache.poi.xssf.usermodel.XSSFSheet;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class Formula extends BaseTransform<FormulaMeta, FormulaData> {
- private XSSFWorkbook workBook;
- private XSSFSheet workSheet;
- private Row sheetRow;
- private HashMap<String, String> replaceMap;
+ private FormulaPoi[] poi;
+ private List<String>[] formulaFieldLists;
+ private final HashMap<String, String> replaceMap = new HashMap<>();
@Override
public boolean init() {
-
- workBook = new XSSFWorkbook();
- workSheet = workBook.createSheet();
- sheetRow = workSheet.createRow(0);
- replaceMap = new HashMap<>();
-
return true;
}
@Override
public void dispose() {
- try {
- workBook.close();
- } catch (IOException e) {
- logError("Unable to close temporary workbook", e);
+ if (poi != null) {
+ for (final var it : poi) {
+ try {
+ it.destroy();
+ } catch (IOException e) {
+ logError("Unable to close temporary workbook", e);
+ }
+ }
}
super.dispose();
}
+ @Override
+ public void batchComplete() throws HopException {
+ super.batchComplete();
+ for (final var it : poi) {
+ it.reset();
+ }
+ }
+
@Override
public boolean processRow() throws HopException {
@@ -112,6 +117,18 @@ public class Formula extends BaseTransform<FormulaMeta,
FormulaData> {
data.replaceIndex[j] = -1;
}
}
+
+ // create one backing row per formula
+ poi =
+ IntStream.range(0, meta.getFormulas().size())
+ .mapToObj(it -> new FormulaPoi(this::logDebug))
+ .toArray(FormulaPoi[]::new);
+ // compute only once for all rows the default field list
+ formulaFieldLists =
+ meta.getFormulas().stream()
+ .map(FormulaMetaFunction::getFormula)
+ .map(FormulaFieldsExtractor::getFormulaFieldList)
+ .toArray(List[]::new);
}
int tempIndex = getInputRowMeta().size();
@@ -120,11 +137,6 @@ public class Formula extends BaseTransform<FormulaMeta,
FormulaData> {
logRowlevel("Read row #" + getLinesRead() + " : " + Arrays.toString(r));
}
- if (sheetRow != null) {
- workSheet.removeRow(sheetRow);
- }
- sheetRow = workSheet.createRow(0);
-
Object[] outputRowData = RowDataUtil.resizeArray(r,
data.outputRowMeta.size());
Object outputValue = null;
@@ -133,7 +145,13 @@ public class Formula extends BaseTransform<FormulaMeta,
FormulaData> {
FormulaMetaFunction formula = meta.getFormulas().get(i);
FormulaParser parser =
new FormulaParser(
- formula, data.outputRowMeta, outputRowData, sheetRow, variables,
replaceMap);
+ formula,
+ data.outputRowMeta,
+ outputRowData,
+ poi[i],
+ variables,
+ replaceMap,
+ formulaFieldLists[i]);
try {
CellValue cellValue = parser.getFormulaValue();
CellType cellType = cellValue.getCellType();
diff --git
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/FormulaPoi.java
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/FormulaPoi.java
new file mode 100644
index 0000000000..d810c5ae88
--- /dev/null
+++
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/FormulaPoi.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hop.pipeline.transforms.formula;
+
+import java.io.IOException;
+import java.util.function.Consumer;
+import org.apache.poi.hssf.usermodel.HSSFFormulaEvaluator;
+import org.apache.poi.hssf.usermodel.HSSFRow;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.ss.usermodel.FormulaEvaluator;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.xssf.usermodel.XSSFFormulaEvaluator;
+import org.apache.poi.xssf.usermodel.XSSFRow;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+
+// very thin "factory abstraction" (mainly accessors) on top of HSS/XSS
workbooks
+// the diff in perf is literally x5 so if we can use HSS it is a win for end
users
+// but it is limited in number of columns
+public class FormulaPoi {
+ private final Consumer<String> log;
+ private XSS slow;
+ private HSS fast;
+
+ public FormulaPoi(final Consumer<String> log) {
+ this.log = log;
+ }
+
+ public Evaluator evaluator(final int columns) {
+ return columns <= 256
+ ? (fast == null ? fast = new HSS() : fast).incr()
+ : (slow == null ? slow = new XSS() : slow);
+ }
+
+ public void reset() {
+ if (fast != null) { // force a reset after a chunk to enable to control
the memory this way too
+ fast.currentCount = HSS.RESET_INTERVAL;
+ fast.incr();
+ }
+ }
+
+ public void destroy() throws IOException {
+ if (slow != null) {
+ slow.workBook.close();
+ }
+ if (fast != null) {
+ fast.workBook.close();
+ }
+ }
+
+ public interface Evaluator {
+ Sheet sheet();
+
+ Row row();
+
+ void row(Row row);
+
+ FormulaEvaluator evaluator();
+ }
+
+ private class XSS implements Evaluator {
+ private final XSSFWorkbook workBook;
+ private final XSSFSheet workSheet;
+ private XSSFRow sheetRow;
+ private final XSSFFormulaEvaluator evaluator;
+
+ private XSS() {
+ log.accept("using xss implementation which is slow");
+ workBook = new XSSFWorkbook();
+ workBook.setCellFormulaValidation(false); // we parse it anyway so no
need to do it twice
+ workSheet = workBook.createSheet();
+ sheetRow = workSheet.createRow(0);
+ evaluator =
sheetRow.getSheet().getWorkbook().getCreationHelper().createFormulaEvaluator();
+ }
+
+ @Override
+ public Sheet sheet() {
+ return workSheet;
+ }
+
+ @Override
+ public Row row() {
+ return sheetRow;
+ }
+
+ @Override
+ public void row(final Row row) {
+ sheetRow = (XSSFRow) row;
+ }
+
+ @Override
+ public FormulaEvaluator evaluator() {
+ return evaluator;
+ }
+ }
+
+ // HSS is fast compared to XSS but it stores unicodestrings
+ // in the internalworkbook of the workbook
+ // so we must reset it from time to time
+ private static class HSS implements Evaluator {
+ private static final int RESET_INTERVAL =
+ Integer.getInteger(HSS.class.getName().replace('$', '.') +
".resetInterval", 10_000);
+
+ private HSSFWorkbook workBook;
+ private HSSFSheet workSheet;
+ private HSSFRow sheetRow;
+ private HSSFFormulaEvaluator evaluator;
+
+ private int currentCount = RESET_INTERVAL;
+
+ private HSS() {
+ incr();
+ }
+
+ @Override
+ public Sheet sheet() {
+ return workSheet;
+ }
+
+ @Override
+ public Row row() {
+ return sheetRow;
+ }
+
+ @Override
+ public void row(final Row row) {
+ sheetRow = (HSSFRow) row;
+ }
+
+ @Override
+ public FormulaEvaluator evaluator() {
+ return evaluator;
+ }
+
+ public Evaluator incr() {
+ if (currentCount++ == RESET_INTERVAL) {
+ workBook = new HSSFWorkbook();
+ workSheet = workBook.createSheet();
+ sheetRow = workSheet.createRow(0);
+ evaluator =
sheetRow.getSheet().getWorkbook().getCreationHelper().createFormulaEvaluator();
+ currentCount = 0;
+ }
+ return this;
+ }
+ }
+}
diff --git
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaFieldsExtractor.java
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaFieldsExtractor.java
new file mode 100644
index 0000000000..7faaab5722
--- /dev/null
+++
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaFieldsExtractor.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hop.pipeline.transforms.formula.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public final class FormulaFieldsExtractor {
+ private FormulaFieldsExtractor() {
+ // no-op
+ }
+
+ // avoid slow wildcard base regex compiled every time for such a simple logic
+ public static List<String> getFormulaFieldList(final String formula) {
+ List<String> theFields = new ArrayList<>();
+
+ int from = 0;
+ do {
+ final int fieldStart = formula.indexOf('[', from);
+ if (fieldStart < 0) {
+ break;
+ }
+ final int fieldEnd = formula.indexOf(']', fieldStart);
+ if (fieldEnd < 0) {
+ from = fieldStart + 1;
+ } else {
+ from = fieldEnd + 1;
+ if (fieldEnd > fieldStart - 1) {
+ theFields.add(formula.substring(fieldStart + 1, fieldEnd));
+ }
+ }
+ } while (from < formula.length() - 1);
+
+ return theFields;
+ }
+}
diff --git
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParser.java
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParser.java
index 5e5ed93479..54dfdf6beb 100644
---
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParser.java
+++
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParser.java
@@ -17,52 +17,47 @@
package org.apache.hop.pipeline.transforms.formula.util;
-import java.util.ArrayList;
+import static
org.apache.hop.pipeline.transforms.formula.util.FormulaFieldsExtractor.getFormulaFieldList;
+
import java.util.HashMap;
import java.util.List;
import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import org.apache.hop.core.exception.HopValueException;
import org.apache.hop.core.row.IRowMeta;
import org.apache.hop.core.row.IValueMeta;
import org.apache.hop.core.variables.IVariables;
import org.apache.hop.pipeline.transforms.formula.FormulaMetaFunction;
+import org.apache.hop.pipeline.transforms.formula.FormulaPoi;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellValue;
-import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Row;
public class FormulaParser {
private FormulaMetaFunction formulaMetaFunction;
private IRowMeta rowMeta;
- private String[] fieldNames;
private String formula;
private List<String> formulaFieldList;
private Object[] dataRow;
- private Row sheetRow;
- private FormulaEvaluator evaluator;
+ private FormulaPoi.Evaluator evaluator;
private HashMap<String, String> replaceMap;
public FormulaParser(
FormulaMetaFunction formulaMetaFunction,
IRowMeta rowMeta,
Object[] dataRow,
- Row sheetRow,
+ FormulaPoi poi,
IVariables variables,
- HashMap<String, String> replaceMap) {
+ HashMap<String, String> replaceMap,
+ List<String> formulaFieldList) {
this.formulaMetaFunction = formulaMetaFunction;
this.rowMeta = rowMeta;
this.dataRow = dataRow;
- this.sheetRow = sheetRow;
- fieldNames = rowMeta.getFieldNames();
this.replaceMap = replaceMap;
formula = variables.resolve(formulaMetaFunction.getFormula());
- evaluator =
sheetRow.getSheet().getWorkbook().getCreationHelper().createFormulaEvaluator();
- formulaFieldList = getFormulaFieldList(formula);
+ this.formulaFieldList = formulaFieldList;
boolean getNewList = false;
for (String formulaField : formulaFieldList) {
@@ -70,42 +65,52 @@ public class FormulaParser {
Set<String> replaceKeys = replaceMap.keySet();
if (replaceKeys.contains(formulaField)) {
String realFieldName = replaceMap.get(formulaField);
- formula = formula.replaceAll("\\[" + formulaField + "\\]", "\\[" +
realFieldName + "\\]");
+ formula = formula.replace("[" + formulaField + "]", "[" +
realFieldName + "]");
getNewList = true;
}
}
if (getNewList) {
- formulaFieldList = getFormulaFieldList(formula);
+ this.formulaFieldList = getFormulaFieldList(formula);
}
- }
-
- private List<String> getFormulaFieldList(String formula) {
- List<String> theFields = new ArrayList<>();
- Pattern regex = Pattern.compile("\\[(.*?)\\]");
- Matcher regexMatcher = regex.matcher(formula);
-
- while (regexMatcher.find()) {
- theFields.add(regexMatcher.group(1));
- }
- return theFields;
+ this.evaluator = poi.evaluator(formulaFieldList.size() + 1);
+ this.evaluator.evaluator().clearAllCachedResultValues();
}
public CellValue getFormulaValue() throws HopValueException {
String parsedFormula = formula;
int fieldIndex = 65;
int colIndex = 0;
+ Row row = evaluator.row();
+
+ // reset, something changed else reuse to leverage the formula parsing
cache which does speed up
+ // a lot the runtime
+ if (row.getLastCellNum() > 0 && row.getLastCellNum() !=
formulaFieldList.size() + 1) {
+ if (evaluator.row() != null) {
+ evaluator.sheet().removeRow(evaluator.row());
+ }
+ row = evaluator.sheet().createRow(0);
+ evaluator.row(row);
+ }
+
for (String formulaField : formulaFieldList) {
char s = (char) fieldIndex;
- Cell cell = sheetRow.createCell(colIndex);
+ final Cell cell;
+ if (row.getLastCellNum() <= colIndex) {
+ cell = row.createCell(colIndex);
+ } else {
+ cell = row.getCell(colIndex);
+ }
int fieldPosition = rowMeta.indexOfValue(formulaField);
- parsedFormula = parsedFormula.replaceAll("\\[" + formulaField + "\\]", s
+ "1");
+ parsedFormula = parsedFormula.replace("[" + formulaField + "]", s + "1");
IValueMeta fieldMeta = rowMeta.getValueMeta(fieldPosition);
if (dataRow[fieldPosition] != null) {
- if (fieldMeta.isBoolean()) {
+ if (fieldMeta.isString()) { // most common first to avoid a lot of
"if" for nothing
+ cell.setCellValue(rowMeta.getString(dataRow, fieldPosition));
+ } else if (fieldMeta.isBoolean()) {
cell.setCellValue(rowMeta.getBoolean(dataRow, fieldPosition));
} else if (fieldMeta.isBigNumber()) {
cell.setCellValue(new HSSFRichTextString(rowMeta.getString(dataRow,
fieldPosition)));
@@ -115,8 +120,6 @@ public class FormulaParser {
cell.setCellValue(rowMeta.getInteger(dataRow, fieldPosition));
} else if (fieldMeta.isNumber()) {
cell.setCellValue(rowMeta.getNumber(dataRow, fieldPosition));
- } else if (fieldMeta.isString()) {
- cell.setCellValue(rowMeta.getString(dataRow, fieldPosition));
} else {
cell.setCellValue(rowMeta.getString(dataRow, fieldPosition));
}
@@ -132,9 +135,14 @@ public class FormulaParser {
colIndex++;
}
- Cell formulaCell = sheetRow.createCell(colIndex);
- formulaCell.setCellFormula(parsedFormula);
+ final Cell formulaCell;
+ if (row.getLastCellNum() <= colIndex) {
+ formulaCell = row.createCell(colIndex);
+ formulaCell.setCellFormula(parsedFormula);
+ } else { // already created/parsed
+ formulaCell = row.getCell(colIndex);
+ }
- return evaluator.evaluate(formulaCell);
+ return evaluator.evaluator().evaluate(formulaCell);
}
}
diff --git
a/plugins/transforms/formula/src/test/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParserTest.java
b/plugins/transforms/formula/src/test/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParserTest.java
new file mode 100644
index 0000000000..68a721a315
--- /dev/null
+++
b/plugins/transforms/formula/src/test/java/org/apache/hop/pipeline/transforms/formula/util/FormulaParserTest.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hop.pipeline.transforms.formula.util;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.hop.core.BlockingRowSet;
+import org.apache.hop.core.exception.HopException;
+import org.apache.hop.core.exception.HopPluginException;
+import org.apache.hop.core.logging.HopLogStore;
+import org.apache.hop.core.logging.ILogChannel;
+import org.apache.hop.core.logging.ILogChannelFactory;
+import org.apache.hop.core.logging.ILogMessage;
+import org.apache.hop.core.logging.ILoggingObject;
+import org.apache.hop.core.logging.LogChannel;
+import org.apache.hop.core.logging.LogLevel;
+import org.apache.hop.core.plugins.PluginRegistry;
+import org.apache.hop.core.plugins.SupplementalPlugin;
+import org.apache.hop.core.row.IRowMeta;
+import org.apache.hop.core.row.IValueMeta;
+import org.apache.hop.core.row.RowMeta;
+import org.apache.hop.core.row.value.ValueMetaBigNumber;
+import org.apache.hop.core.row.value.ValueMetaPluginType;
+import org.apache.hop.core.row.value.ValueMetaString;
+import org.apache.hop.pipeline.Pipeline;
+import org.apache.hop.pipeline.PipelineMeta;
+import org.apache.hop.pipeline.config.PipelineRunConfiguration;
+import org.apache.hop.pipeline.engine.PipelineEngineCapabilities;
+import org.apache.hop.pipeline.transform.BaseTransformMeta;
+import org.apache.hop.pipeline.transform.RowAdapter;
+import org.apache.hop.pipeline.transform.TransformMeta;
+import org.apache.hop.pipeline.transforms.formula.Formula;
+import org.apache.hop.pipeline.transforms.formula.FormulaData;
+import org.apache.hop.pipeline.transforms.formula.FormulaMeta;
+import org.apache.hop.pipeline.transforms.formula.FormulaMetaFunction;
+import org.junit.jupiter.api.Test;
+
+class FormulaParserTest {
+ static {
+ HopLogStore.setLogChannelFactory(new ConsoleLogChannelFactory());
+
+ try {
+ final var plugin = new SupplementalPlugin(ValueMetaPluginType.class,
"2");
+ plugin.addFactory(IValueMeta.class, ValueMetaString::new);
+ PluginRegistry.getInstance().registerPlugin(ValueMetaPluginType.class,
plugin);
+ } catch (final HopPluginException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ // mainly a perf test since originally it was very slow
+ @Test
+ void formula() throws HopException {
+ final var rowMeta = new RowMeta();
+ rowMeta.setValueMetaList(
+ List.of(
+ new ValueMetaBigNumber("FOO"),
+ new ValueMetaString("BAR"),
+ new ValueMetaString("DUMMY"),
+ new ValueMetaString("LAST")));
+
+ final int maxSize =
+ Integer.getInteger(getClass().getName() + ".formula.maxSize",
/*1_000_000*/ 1_000);
+ final var row = new BlockingRowSet(maxSize);
+ for (var i = 0; i < maxSize; i++) {
+ row.putRow(rowMeta, new Object[] {new BigDecimal(-i), "A", "B", "B"});
+ }
+ row.setDone();
+
+ final var meta = new FormulaMeta();
+ meta.setFormulas(
+ List.of(
+ new FormulaMetaFunction(
+ "EVAL1",
+ "IF(" + "[BAR] = \"A\", " + "IF(ISBLANK([DUMMY]), [LAST],
[DUMMY])," + " [LAST])",
+ IValueMeta.TYPE_STRING,
+ -1,
+ -1,
+ "",
+ false),
+ new FormulaMetaFunction(
+ "EVAL2",
+ "TEXT(ABS([FOO]) , \"#.#######\")",
+ IValueMeta.TYPE_STRING,
+ -1,
+ -1,
+ "",
+ false)));
+
+ final var transformMeta = new BaseTransformMeta<>();
+ final var transformMetadata = new TransformMeta("1", "test",
transformMeta);
+
+ final var pipelineMeta = new PipelineMeta();
+ pipelineMeta.addTransform(transformMetadata);
+
+ final var pipeline =
+ new Pipeline() {
+ private final PipelineEngineCapabilities pipelineEngineCapabilities =
+ new PipelineEngineCapabilities();
+ private final PipelineRunConfiguration pipelineRunConfiguration =
+ new PipelineRunConfiguration();
+
+ @Override
+ public PipelineEngineCapabilities getEngineCapabilities() {
+ return pipelineEngineCapabilities;
+ }
+
+ @Override
+ public PipelineRunConfiguration getPipelineRunConfiguration() {
+ return pipelineRunConfiguration;
+ }
+
+ @Override
+ public String getStatusDescription() {
+ return "";
+ }
+ };
+ pipeline.setRunning(true);
+
+ final var data = new FormulaData();
+ data.outputRowMeta = new RowMeta();
+
+ final var formula = new Formula(transformMetadata, meta, data, 1,
pipelineMeta, pipeline);
+
+ final var counter = new AtomicInteger();
+ formula.addRowListener(
+ new RowAdapter() {
+ @Override
+ public void rowWrittenEvent(final IRowMeta rowMeta, final Object[]
row) {
+ assertEquals("B", row[4]);
+ assertEquals(Integer.toString(counter.getAndIncrement()), row[5]);
+ }
+ });
+
+ formula.init();
+ formula.startBundle();
+ formula.setInputRowSets(new ArrayList<>(List.of(row)));
+ while (formula.processRow())
+ ;
+ formula.finishBundle();
+ formula.setOutputDone();
+ pipeline.setRunning(false);
+
+ assertEquals(maxSize, counter.get());
+ }
+
+ private static class ConsoleLogChannelFactory implements ILogChannelFactory {
+ private final ILogChannel simpleLog =
+ new LogChannel("test") {
+ @Override
+ public void println(final ILogMessage logMessage, final LogLevel
channelLogLevel) {
+ System.out.println("[" + channelLogLevel + "] " + logMessage);
+ }
+ };
+
+ @Override
+ public ILogChannel create(final Object subject) {
+ return simpleLog;
+ }
+
+ @Override
+ public ILogChannel create(final Object subject, final boolean
gatheringMetrics) {
+ return simpleLog;
+ }
+
+ @Override
+ public ILogChannel create(final Object subject, final ILoggingObject
parentObject) {
+ return simpleLog;
+ }
+
+ @Override
+ public ILogChannel create(
+ final Object subject, final ILoggingObject parentObject, final boolean
gatheringMetrics) {
+ return simpleLog;
+ }
+ }
+}