This is an automated email from the ASF dual-hosted git repository.
hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git
The following commit(s) were added to refs/heads/main by this push:
new 865a1822b5 Fix: Incorrect Field Name Updates After Rename in Fuzzy
match (#6137)
865a1822b5 is described below
commit 865a1822b56d9cf428bb238d25b8fff41945ee37
Author: lance <[email protected]>
AuthorDate: Tue Dec 9 22:54:51 2025 +0800
Fix: Incorrect Field Name Updates After Rename in Fuzzy match (#6137)
Signed-off-by: lance <[email protected]>
---
.../transforms/fuzzymatch/FuzzyMatchMeta.java | 73 +++---
.../main/samples/transforms/fuzzy-match-rename.hpl | 278 +++++++++++++++++++++
.../transforms/fuzzymatch/FuzzyMatchMetaTest.java | 56 ++++-
3 files changed, 365 insertions(+), 42 deletions(-)
diff --git
a/plugins/transforms/fuzzymatch/src/main/java/org/apache/hop/pipeline/transforms/fuzzymatch/FuzzyMatchMeta.java
b/plugins/transforms/fuzzymatch/src/main/java/org/apache/hop/pipeline/transforms/fuzzymatch/FuzzyMatchMeta.java
index c51ce24240..62a3e92908 100644
---
a/plugins/transforms/fuzzymatch/src/main/java/org/apache/hop/pipeline/transforms/fuzzymatch/FuzzyMatchMeta.java
+++
b/plugins/transforms/fuzzymatch/src/main/java/org/apache/hop/pipeline/transforms/fuzzymatch/FuzzyMatchMeta.java
@@ -25,6 +25,9 @@ import static
org.apache.hop.pipeline.transforms.fuzzymatch.FuzzyMatchMeta.Algor
import java.util.ArrayList;
import java.util.List;
+import java.util.Objects;
+import lombok.Getter;
+import lombok.Setter;
import org.apache.commons.lang.StringUtils;
import org.apache.hop.core.CheckResult;
import org.apache.hop.core.Const;
@@ -36,6 +39,7 @@ import org.apache.hop.core.row.IValueMeta;
import org.apache.hop.core.row.value.ValueMetaInteger;
import org.apache.hop.core.row.value.ValueMetaNumber;
import org.apache.hop.core.row.value.ValueMetaString;
+import org.apache.hop.core.util.Utils;
import org.apache.hop.core.variables.IVariables;
import org.apache.hop.i18n.BaseMessages;
import org.apache.hop.metadata.api.HopMetadataProperty;
@@ -204,9 +208,9 @@ public class FuzzyMatchMeta extends
BaseTransformMeta<FuzzyMatch, FuzzyMatchData
// Configuration error/missing resources...
v.setName(lookupValue.getName());
v.setOrigin(name);
- v.setStorageType(
- IValueMeta.STORAGE_TYPE_NORMAL); // Only normal storage goes
into the cache
- inputRowMeta.addValueMeta(v);
+ // Only normal storage goes into the cache
+ v.setStorageType(IValueMeta.STORAGE_TYPE_NORMAL);
+ replaceValueMeta(inputRowMeta, lookupValue, v);
} else {
throw new HopTransformException(
BaseMessages.getString(
@@ -219,12 +223,35 @@ public class FuzzyMatchMeta extends
BaseTransformMeta<FuzzyMatch, FuzzyMatchData
for (FMLookupValue lookupValue : lookupValues) {
v = new ValueMetaString(lookupValue.getName());
v.setOrigin(name);
- inputRowMeta.addValueMeta(v);
+ replaceValueMeta(inputRowMeta, lookupValue, v);
}
}
}
}
+ /**
+ * Replaces or adds a value meta in the given row meta structure.
+ *
+ * @param inputRowMeta The row meta where the new field should be inserted.
+ * @param lookupValue The lookup configuration that may contain a rename
target.
+ * @param newMeta The value meta to add or replace.
+ */
+ private void replaceValueMeta(
+ IRowMeta inputRowMeta, FMLookupValue lookupValue, IValueMeta newMeta) {
+ int index = inputRowMeta.indexOfValue(newMeta.getName());
+ // rename field name
+ if (Objects.nonNull(lookupValue) &&
!Utils.isEmpty(lookupValue.getRename())) {
+ newMeta.setName(lookupValue.getRename());
+ }
+
+ // add or replace valueMeta
+ if (index == -1) {
+ inputRowMeta.addValueMeta(newMeta);
+ } else {
+ inputRowMeta.setValueMeta(index, newMeta);
+ }
+ }
+
@Override
public void check(
List<ICheckResult> remarks,
@@ -721,6 +748,8 @@ public class FuzzyMatchMeta extends
BaseTransformMeta<FuzzyMatch, FuzzyMatchData
}
}
+ @Setter
+ @Getter
public static final class FMLookupValue {
@HopMetadataProperty(key = "name")
private String name;
@@ -739,41 +768,5 @@ public class FuzzyMatchMeta extends
BaseTransformMeta<FuzzyMatch, FuzzyMatchData
this.name = name;
this.rename = rename;
}
-
- /**
- * Gets name
- *
- * @return value of name
- */
- public String getName() {
- return name;
- }
-
- /**
- * Sets name
- *
- * @param name value of name
- */
- public void setName(String name) {
- this.name = name;
- }
-
- /**
- * Gets rename
- *
- * @return value of rename
- */
- public String getRename() {
- return rename;
- }
-
- /**
- * Sets rename
- *
- * @param rename value of rename
- */
- public void setRename(String rename) {
- this.rename = rename;
- }
}
}
diff --git
a/plugins/transforms/fuzzymatch/src/main/samples/transforms/fuzzy-match-rename.hpl
b/plugins/transforms/fuzzymatch/src/main/samples/transforms/fuzzy-match-rename.hpl
new file mode 100644
index 0000000000..8790e20414
--- /dev/null
+++
b/plugins/transforms/fuzzymatch/src/main/samples/transforms/fuzzy-match-rename.hpl
@@ -0,0 +1,278 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-->
+<pipeline>
+ <info>
+ <name>fuzzy-match-rename</name>
+ <name_sync_with_filename>Y</name_sync_with_filename>
+ <description/>
+ <extended_description/>
+ <pipeline_version/>
+ <pipeline_type>Normal</pipeline_type>
+ <parameters>
+ </parameters>
+ <capture_transform_performance>N</capture_transform_performance>
+
<transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
+
<transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
+ <created_user>-</created_user>
+ <created_date>2024/10/24 14:53:48.724</created_date>
+ <modified_user>-</modified_user>
+ <modified_date>2024/10/24 14:53:48.724</modified_date>
+ </info>
+ <notepads>
+ </notepads>
+ <order>
+ <hop>
+ <from>Data grid</from>
+ <to>Fuzzy match</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Fuzzy match</from>
+ <to>Write to log</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Data grid 2</from>
+ <to>Fuzzy match</to>
+ <enabled>Y</enabled>
+ </hop>
+ </order>
+ <transform>
+ <name>Data grid</name>
+ <type>DataGrid</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <data>
+ <line>
+ <item>1</item>
+ <item>val.a</item>
+ <item>[email protected]</item>
+ <item><contacts><contact
id="1"><name>lance</name><email>[email protected]</email><phone>+1
555-234-7788</phone></contact></contacts></item>
+ </line>
+ </data>
+ <fields>
+ <field>
+ <length>-1</length>
+ <precision>-1</precision>
+ <currency/>
+ <set_empty_string>N</set_empty_string>
+ <name>id</name>
+ <format/>
+ <group/>
+ <decimal/>
+ <type>String</type>
+ </field>
+ <field>
+ <length>-1</length>
+ <precision>-1</precision>
+ <currency/>
+ <set_empty_string>N</set_empty_string>
+ <name>name</name>
+ <format/>
+ <group/>
+ <decimal/>
+ <type>String</type>
+ </field>
+ <field>
+ <length>-1</length>
+ <precision>-1</precision>
+ <currency/>
+ <set_empty_string>N</set_empty_string>
+ <name>address</name>
+ <format/>
+ <group/>
+ <decimal/>
+ <type>String</type>
+ </field>
+ <field>
+ <length>-1</length>
+ <precision>-1</precision>
+ <currency/>
+ <set_empty_string>N</set_empty_string>
+ <name>loginXml</name>
+ <format/>
+ <group/>
+ <decimal/>
+ <type>String</type>
+ </field>
+ </fields>
+ <attributes/>
+ <GUI>
+ <xloc>64</xloc>
+ <yloc>176</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Data grid 2</name>
+ <type>DataGrid</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <data>
+ <line>
+ <item>1</item>
+ <item>val.a</item>
+ <item>[email protected]</item>
+ <item><contacts><contact
id="1"><name>lance</name><email>[email protected]</email><phone>333-234-7788</phone></contact></contacts></item>
+ </line>
+ </data>
+ <fields>
+ <field>
+ <length>-1</length>
+ <precision>-1</precision>
+ <currency/>
+ <set_empty_string>N</set_empty_string>
+ <name>new_id</name>
+ <format/>
+ <group/>
+ <decimal/>
+ <type>String</type>
+ </field>
+ <field>
+ <length>-1</length>
+ <precision>-1</precision>
+ <currency/>
+ <set_empty_string>N</set_empty_string>
+ <name>new_name</name>
+ <format/>
+ <group/>
+ <decimal/>
+ <type>String</type>
+ </field>
+ <field>
+ <length>-1</length>
+ <precision>-1</precision>
+ <currency/>
+ <set_empty_string>N</set_empty_string>
+ <name>new_address</name>
+ <format/>
+ <group/>
+ <decimal/>
+ <type>String</type>
+ </field>
+ <field>
+ <length>-1</length>
+ <precision>-1</precision>
+ <currency/>
+ <set_empty_string>N</set_empty_string>
+ <name>loginXml</name>
+ <format/>
+ <group/>
+ <decimal/>
+ <type>String</type>
+ </field>
+ </fields>
+ <attributes/>
+ <GUI>
+ <xloc>64</xloc>
+ <yloc>288</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Fuzzy match</name>
+ <type>FuzzyMatch</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <algorithm>jarowinkler</algorithm>
+ <caseSensitive>N</caseSensitive>
+ <closervalue>Y</closervalue>
+ <from>Data grid 2</from>
+ <lookup>
+ <value>
+ <name>new_id</name>
+ <rename>user_id</rename>
+ </value>
+ <value>
+ <name>new_name</name>
+ <rename>user_name</rename>
+ </value>
+ <value>
+ <name>new_address</name>
+ </value>
+ </lookup>
+ <lookupfield>new_name</lookupfield>
+ <mainstreamfield>name</mainstreamfield>
+ <maximalValue>1</maximalValue>
+ <minimalValue>0</minimalValue>
+ <outputmatchfield>match</outputmatchfield>
+ <outputvaluefield>measure value</outputvaluefield>
+ <separator>,</separator>
+ <attributes/>
+ <GUI>
+ <xloc>224</xloc>
+ <yloc>224</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Write to log</name>
+ <type>WriteToLog</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <displayHeader>Y</displayHeader>
+ <fields>
+</fields>
+ <limitRows>N</limitRows>
+ <limitRowsNumber>0</limitRowsNumber>
+ <loglevel>Basic</loglevel>
+ <attributes/>
+ <GUI>
+ <xloc>384</xloc>
+ <yloc>224</yloc>
+ </GUI>
+ </transform>
+ <transform_error_handling>
+ <error>
+ <source_transform>Fuzzy match</source_transform>
+ <target_transform/>
+ <is_enabled>Y</is_enabled>
+ <nr_valuename/>
+ <descriptions_valuename/>
+ <fields_valuename/>
+ <codes_valuename/>
+ <max_errors/>
+ <max_pct_errors/>
+ <min_pct_rows/>
+ </error>
+ </transform_error_handling>
+ <attributes/>
+</pipeline>
diff --git
a/plugins/transforms/fuzzymatch/src/test/java/org/apache/hop/pipeline/transforms/fuzzymatch/FuzzyMatchMetaTest.java
b/plugins/transforms/fuzzymatch/src/test/java/org/apache/hop/pipeline/transforms/fuzzymatch/FuzzyMatchMetaTest.java
index db3b41fb63..189482b147 100644
---
a/plugins/transforms/fuzzymatch/src/test/java/org/apache/hop/pipeline/transforms/fuzzymatch/FuzzyMatchMetaTest.java
+++
b/plugins/transforms/fuzzymatch/src/test/java/org/apache/hop/pipeline/transforms/fuzzymatch/FuzzyMatchMetaTest.java
@@ -17,10 +17,20 @@
package org.apache.hop.pipeline.transforms.fuzzymatch;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import java.util.List;
+import org.apache.hop.core.row.IRowMeta;
+import org.apache.hop.core.row.IValueMeta;
+import org.apache.hop.core.row.RowMeta;
+import org.apache.hop.core.row.value.ValueMetaString;
+import org.apache.hop.core.variables.Variables;
import org.apache.hop.pipeline.transform.TransformSerializationTestUtil;
import org.junit.jupiter.api.Test;
+/** Unit test for {@link FuzzyMatchMeta} */
class FuzzyMatchMetaTest {
@Test
@@ -34,8 +44,8 @@ class FuzzyMatchMetaTest {
assertEquals("name", meta.getMainStreamField());
assertEquals("match", meta.getOutputMatchField());
assertEquals("measure value", meta.getOutputValueField());
- assertEquals(false, meta.isCaseSensitive());
- assertEquals(true, meta.isCloserValue());
+ assertFalse(meta.isCaseSensitive());
+ assertTrue(meta.isCloserValue());
assertEquals("0", meta.getMinimalValue());
assertEquals("1", meta.getMaximalValue());
assertEquals(",", meta.getSeparator());
@@ -44,4 +54,46 @@ class FuzzyMatchMetaTest {
assertEquals("name", meta.getLookupValues().get(0).getName());
assertEquals("lookupName", meta.getLookupValues().get(0).getRename());
}
+
+ @Test
+ void testGetFieldsRename() throws Exception {
+ FuzzyMatchMeta meta = new FuzzyMatchMeta();
+ meta.setCloserValue(true);
+ meta.setAlgorithm(FuzzyMatchMeta.Algorithm.JARO_WINKLER);
+ meta.setOutputMatchField("match");
+ meta.setOutputValueField("value");
+
+ String oldName = "old_name";
+ String newName = "new_name";
+ String noChangeName = "noChangeName";
+
+ // lookup name="oldName", rename="newName"
+ FuzzyMatchMeta.FMLookupValue lookupValue = new
FuzzyMatchMeta.FMLookupValue(oldName, newName);
+ FuzzyMatchMeta.FMLookupValue noChange = new
FuzzyMatchMeta.FMLookupValue(noChangeName, null);
+ meta.setLookupValues(List.of(lookupValue, noChange));
+
+ // input main row meta
+ IRowMeta inputRowMeta = new RowMeta();
+
+ // lookup info row meta
+ IRowMeta lookupRowMeta = new RowMeta();
+ lookupRowMeta.addValueMeta(new ValueMetaString(oldName));
+ lookupRowMeta.addValueMeta(new ValueMetaString(noChangeName));
+
+ IRowMeta[] info = new IRowMeta[] {lookupRowMeta};
+
+ // execute getFields methods.
+ meta.getFields(inputRowMeta, "FuzzyMatch", info, null, new Variables(),
null);
+
+ // valid rename
+ IValueMeta result = inputRowMeta.searchValueMeta(newName);
+ assertNotNull(result);
+ assertEquals(newName, result.getName());
+ assertEquals("FuzzyMatch", result.getOrigin());
+
+ // not change.
+ result = inputRowMeta.searchValueMeta(noChangeName);
+ assertNotNull(result);
+ assertEquals(noChangeName, result.getName());
+ }
}