This is an automated email from the ASF dual-hosted git repository.
hansva pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hop.git
The following commit(s) were added to refs/heads/master by this push:
new aa276ddf2d allow formula transform to use fields created in previous
transforms. fixes #3239
new d254db7e08 Merge pull request #3251 from bamaer/3239
aa276ddf2d is described below
commit aa276ddf2d463ece36b32c56e19b9236ee040b5a
Author: Bart Maertens <[email protected]>
AuthorDate: Sat Sep 23 12:00:47 2023 +0200
allow formula transform to use fields created in previous transforms. fixes
#3239
---
...mula-use-fields-created-in-previous-formula.hpl | 159 +++++++++++++++++++++
...mula-use-fields-created-in-previous-formula.csv | 2 +
...mula-use-fields-created-in-previous-formula.hwf | 79 ++++++++++
...ula-use-fields-created-in-previous-formula.json | 48 +++++++
...se-fields-created-in-previous-formula UNIT.json | 48 +++++++
.../hop/pipeline/transforms/formula/Formula.java | 27 ++--
6 files changed, 351 insertions(+), 12 deletions(-)
diff --git
a/integration-tests/transforms/0042-formula-use-fields-created-in-previous-formula.hpl
b/integration-tests/transforms/0042-formula-use-fields-created-in-previous-formula.hpl
new file mode 100644
index 0000000000..3d89bd944d
--- /dev/null
+++
b/integration-tests/transforms/0042-formula-use-fields-created-in-previous-formula.hpl
@@ -0,0 +1,159 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-->
+<pipeline>
+ <info>
+ <name>0069-formula-use-fields-created-in-previous-formula</name>
+ <name_sync_with_filename>Y</name_sync_with_filename>
+ <description/>
+ <extended_description/>
+ <pipeline_version/>
+ <pipeline_type>Normal</pipeline_type>
+ <parameters>
+ </parameters>
+ <capture_transform_performance>N</capture_transform_performance>
+
<transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
+
<transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
+ <created_user>-</created_user>
+ <created_date>2023/09/23 11:31:43.891</created_date>
+ <modified_user>-</modified_user>
+ <modified_date>2023/09/23 11:31:43.891</modified_date>
+ </info>
+ <notepads>
+ </notepads>
+ <order>
+ <hop>
+ <from>re-use formula field</from>
+ <to>verify</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>foo, bar</from>
+ <to>re-use formula field</to>
+ <enabled>Y</enabled>
+ </hop>
+ </order>
+ <transform>
+ <name>foo, bar</name>
+ <type>DataGrid</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <data>
+ <line>
+ <item>foo</item>
+ <item>bar</item>
+ </line>
+ </data>
+ <fields>
+ <field>
+ <length>-1</length>
+ <precision>-1</precision>
+ <set_empty_string>N</set_empty_string>
+ <name>foo</name>
+ <type>String</type>
+ </field>
+ <field>
+ <length>-1</length>
+ <precision>-1</precision>
+ <set_empty_string>N</set_empty_string>
+ <name>bar</name>
+ <type>String</type>
+ </field>
+ </fields>
+ <attributes/>
+ <GUI>
+ <xloc>144</xloc>
+ <yloc>176</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>re-use formula field</name>
+ <type>Formula</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <formulas>
+ <formula>
+ <field_name>foobar</field_name>
+ <formula>[foo] & [bar]</formula>
+ <value_length>-1</value_length>
+ <value_precision>-1</value_precision>
+ <value_type>2</value_type>
+ </formula>
+ <formula>
+ <field_name>foobar_added</field_name>
+ <formula>[foobar] & "abcd"</formula>
+ <value_length>-1</value_length>
+ <value_precision>-1</value_precision>
+ <value_type>2</value_type>
+ </formula>
+ <formula>
+ <field_name>foobar_added_double</field_name>
+ <formula>[foobar_added] & " - " & [foobar_added]</formula>
+ <value_length>-1</value_length>
+ <value_precision>-1</value_precision>
+ <value_type>2</value_type>
+ </formula>
+ <formula>
+ <field_name>replace_foo</field_name>
+ <formula>[foobar_added_double]</formula>
+ <replace_field>foo</replace_field>
+ <value_length>-1</value_length>
+ <value_precision>-1</value_precision>
+ <value_type>0</value_type>
+ </formula>
+ </formulas>
+ <attributes/>
+ <GUI>
+ <xloc>272</xloc>
+ <yloc>176</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>verify</name>
+ <type>Dummy</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <attributes/>
+ <GUI>
+ <xloc>400</xloc>
+ <yloc>176</yloc>
+ </GUI>
+ </transform>
+ <transform_error_handling>
+ </transform_error_handling>
+ <attributes/>
+</pipeline>
diff --git
a/integration-tests/transforms/datasets/golden-formula-use-fields-created-in-previous-formula.csv
b/integration-tests/transforms/datasets/golden-formula-use-fields-created-in-previous-formula.csv
new file mode 100644
index 0000000000..b71ea22e71
--- /dev/null
+++
b/integration-tests/transforms/datasets/golden-formula-use-fields-created-in-previous-formula.csv
@@ -0,0 +1,2 @@
+foo,bar,foobar,foobar_added,foobar_added_double
+foobarabcd - foobarabcd,bar,foobar,foobarabcd,foobarabcd - foobarabcd
diff --git
a/integration-tests/transforms/main-0042-formula-use-fields-created-in-previous-formula.hwf
b/integration-tests/transforms/main-0042-formula-use-fields-created-in-previous-formula.hwf
new file mode 100644
index 0000000000..75ee7a6525
--- /dev/null
+++
b/integration-tests/transforms/main-0042-formula-use-fields-created-in-previous-formula.hwf
@@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-->
+<workflow>
+ <name>main-0042-formula-use-fields-created-in-previous-formula</name>
+ <name_sync_with_filename>Y</name_sync_with_filename>
+ <description/>
+ <extended_description/>
+ <workflow_version/>
+ <created_user>-</created_user>
+ <created_date>2022/04/29 10:42:16.470</created_date>
+ <modified_user>-</modified_user>
+ <modified_date>2022/04/29 10:42:16.470</modified_date>
+ <parameters>
+ </parameters>
+ <actions>
+ <action>
+ <name>Start</name>
+ <description/>
+ <type>SPECIAL</type>
+ <attributes/>
+ <DayOfMonth>1</DayOfMonth>
+ <hour>12</hour>
+ <intervalMinutes>60</intervalMinutes>
+ <intervalSeconds>0</intervalSeconds>
+ <minutes>0</minutes>
+ <repeat>N</repeat>
+ <schedulerType>0</schedulerType>
+ <weekDay>1</weekDay>
+ <parallel>N</parallel>
+ <xloc>50</xloc>
+ <yloc>50</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>Run Fornula Use Fields Created In Previous Formula Test</name>
+ <description/>
+ <type>RunPipelineTests</type>
+ <attributes/>
+ <test_names>
+ <test_name>
+ <name>0042-formula-use-fields-created-in-previous-formula UNIT</name>
+ </test_name>
+ </test_names>
+ <parallel>N</parallel>
+ <xloc>320</xloc>
+ <yloc>48</yloc>
+ <attributes_hac/>
+ </action>
+ </actions>
+ <hops>
+ <hop>
+ <from>Start</from>
+ <to>Run Fornula Use Fields Created In Previous Formula Test</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>Y</unconditional>
+ </hop>
+ </hops>
+ <notepads>
+ </notepads>
+ <attributes/>
+</workflow>
diff --git
a/integration-tests/transforms/metadata/dataset/golden-formula-use-fields-created-in-previous-formula.json
b/integration-tests/transforms/metadata/dataset/golden-formula-use-fields-created-in-previous-formula.json
new file mode 100644
index 0000000000..2265875d40
--- /dev/null
+++
b/integration-tests/transforms/metadata/dataset/golden-formula-use-fields-created-in-previous-formula.json
@@ -0,0 +1,48 @@
+{
+ "base_filename": "golden-formula-use-fields-created-in-previous-formula.csv",
+ "name": "golden-formula-use-fields-created-in-previous-formula",
+ "description": "",
+ "dataset_fields": [
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_name": "foo",
+ "field_format": ""
+ },
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_name": "bar",
+ "field_format": ""
+ },
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_name": "foobar",
+ "field_format": ""
+ },
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_name": "foobar_added",
+ "field_format": ""
+ },
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_name": "foobar_added_double",
+ "field_format": ""
+ }
+ ],
+ "folder_name": ""
+}
\ No newline at end of file
diff --git
a/integration-tests/transforms/metadata/unit-test/0042-formula-use-fields-created-in-previous-formula
UNIT.json
b/integration-tests/transforms/metadata/unit-test/0042-formula-use-fields-created-in-previous-formula
UNIT.json
new file mode 100644
index 0000000000..073777c6a3
--- /dev/null
+++
b/integration-tests/transforms/metadata/unit-test/0042-formula-use-fields-created-in-previous-formula
UNIT.json
@@ -0,0 +1,48 @@
+{
+ "variableValues": [],
+ "database_replacements": [],
+ "autoOpening": true,
+ "basePath": "",
+ "golden_data_sets": [
+ {
+ "field_mappings": [
+ {
+ "transform_field": "foo",
+ "data_set_field": "foo"
+ },
+ {
+ "transform_field": "bar",
+ "data_set_field": "bar"
+ },
+ {
+ "transform_field": "foobar",
+ "data_set_field": "foobar"
+ },
+ {
+ "transform_field": "foobar_added",
+ "data_set_field": "foobar_added"
+ },
+ {
+ "transform_field": "foobar_added_double",
+ "data_set_field": "foobar_added_double"
+ }
+ ],
+ "field_order": [
+ "foo",
+ "bar",
+ "foobar",
+ "foobar_added",
+ "foobar_added_double"
+ ],
+ "data_set_name": "golden-formula-use-fields-created-in-previous-formula",
+ "transform_name": "verify"
+ }
+ ],
+ "input_data_sets": [],
+ "name": "0042-formula-use-fields-created-in-previous-formula UNIT",
+ "description": "",
+ "persist_filename": "",
+ "trans_test_tweaks": [],
+ "pipeline_filename":
"./0042-formula-use-fields-created-in-previous-formula.hpl",
+ "test_type": "UNIT_TEST"
+}
diff --git
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/Formula.java
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/Formula.java
index 852820efc9..285cfd5665 100644
---
a/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/Formula.java
+++
b/plugins/transforms/formula/src/main/java/org/apache/hop/pipeline/transforms/formula/Formula.java
@@ -78,32 +78,32 @@ public class Formula extends BaseTransform<FormulaMeta,
FormulaData> {
return false;
}
- int tempIndex = getInputRowMeta().size();
if (first) {
first = false;
-
data.outputRowMeta = getInputRowMeta().clone();
- meta.getFields(data.outputRowMeta, getTransformName(), null, null, this,
metadataProvider);
// Calculate replace indexes...
//
data.replaceIndex = new int[meta.getFormulas().size()];
- for (int i = 0; i < meta.getFormulas().size(); i++) {
- FormulaMetaFunction fn = meta.getFormulas().get(i);
+ for (int j = 0; j < meta.getFormulas().size(); j++) {
+ FormulaMetaFunction fn = meta.getFormulas().get(j);
if (!Utils.isEmpty(fn.getReplaceField())) {
- data.replaceIndex[i] =
getInputRowMeta().indexOfValue(fn.getReplaceField());
- if (data.replaceIndex[i] < 0) {
+ data.replaceIndex[j] =
data.outputRowMeta.indexOfValue(fn.getReplaceField());
+ if (data.replaceIndex[j] < 0) {
throw new HopException(
- "Unknown field specified to replace with a formula result: ["
- + fn.getReplaceField()
- + "]");
+ "Unknown field specified to replace with a formula result:
["
+ + fn.getReplaceField()
+ + "]");
}
} else {
- data.replaceIndex[i] = -1;
+ data.replaceIndex[j] = -1;
}
}
}
+ meta.getFields(data.outputRowMeta, getTransformName(), null, null, this,
metadataProvider);
+ int tempIndex = getInputRowMeta().size();
+
if (log.isRowLevel()) {
logRowlevel("Read row #" + getLinesRead() + " : " + Arrays.toString(r));
@@ -120,7 +120,7 @@ public class Formula extends BaseTransform<FormulaMeta,
FormulaData> {
for (int i = 0; i < meta.getFormulas().size(); i++) {
FormulaMetaFunction formula = meta.getFormulas().get(i);
- FormulaParser parser = new FormulaParser(formula, getInputRowMeta(), r,
sheetRow, variables);
+ FormulaParser parser = new FormulaParser(formula, data.outputRowMeta, r,
sheetRow, variables);
CellValue cellValue = parser.getFormulaValue();
CellType cellType = cellValue.getCellType();
@@ -177,11 +177,14 @@ public class Formula extends BaseTransform<FormulaMeta,
FormulaData> {
int realIndex = (data.replaceIndex[i] < 0) ? tempIndex++ :
data.replaceIndex[i];
+
+
outputRowData[realIndex] =
getReturnValue(outputValue, data.returnType[i], realIndex, formula);
}
putRow(data.outputRowMeta, outputRowData);
+
if (log.isRowLevel()) {
logRowlevel("Wrote row #" + getLinesWritten() + " : " +
Arrays.toString(r));
}