This is an automated email from the ASF dual-hosted git repository.
hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git
The following commit(s) were added to refs/heads/main by this push:
new 762b9a0434 Issue #6603 (XML cleanup of Html2Text) (#6739)
762b9a0434 is described below
commit 762b9a04343364519433ee4645ac5e16ec5e8192
Author: Matt Casters <[email protected]>
AuthorDate: Mon Mar 9 15:30:45 2026 +0100
Issue #6603 (XML cleanup of Html2Text) (#6739)
---
integration-tests/database/0086-html2text.hpl | 134 +++++++++++++++
.../transforms/datasets/golden-html2text.csv | 18 ++
.../metadata/dataset/golden-html2text.json | 24 +++
.../metadata/unit-test/0086-html2text UNIT.json | 33 ++++
.../pipeline/transforms/html2text/Html2Text.java | 32 ++--
.../transforms/html2text/Html2TextDialog.java | 40 ++---
.../transforms/html2text/Html2TextMeta.java | 187 +++++----------------
.../transforms/html2text/Html2TextMetaTest.java | 37 ++--
.../html2text/src/test/resources/transform.xml | 25 +++
9 files changed, 319 insertions(+), 211 deletions(-)
diff --git a/integration-tests/database/0086-html2text.hpl
b/integration-tests/database/0086-html2text.hpl
new file mode 100644
index 0000000000..cab436ac20
--- /dev/null
+++ b/integration-tests/database/0086-html2text.hpl
@@ -0,0 +1,134 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-->
+<pipeline>
+ <info>
+ <name>0086-html2text</name>
+ <name_sync_with_filename>Y</name_sync_with_filename>
+ <description/>
+ <extended_description/>
+ <pipeline_version/>
+ <pipeline_type>Normal</pipeline_type>
+ <parameters>
+ </parameters>
+ <capture_transform_performance>N</capture_transform_performance>
+
<transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
+
<transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
+ <created_user>-</created_user>
+ <created_date>2026/03/08 11:34:47.320</created_date>
+ <modified_user>-</modified_user>
+ <modified_date>2026/03/08 11:34:47.320</modified_date>
+ </info>
+ <notepads>
+ </notepads>
+ <order>
+ <hop>
+ <from>basic-html</from>
+ <to>HTML to Text</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>HTML to Text</from>
+ <to>Verify</to>
+ <enabled>Y</enabled>
+ </hop>
+ </order>
+ <transform>
+ <name>HTML to Text</name>
+ <type>Html2Text</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <cleanOnly>N</cleanOnly>
+ <htmlField>html</htmlField>
+ <normalisedText>N</normalisedText>
+ <outputField>html2text_output</outputField>
+ <parallelism>N</parallelism>
+ <safelistType>basic</safelistType>
+ <attributes/>
+ <GUI>
+ <xloc>224</xloc>
+ <yloc>112</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>basic-html</name>
+ <type>DataGrid</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <data>
+ <line>
+ <item><!DOCTYPE html>
+<html>
+<body>
+
+<h1>My First Heading</h1>
+<p>My first paragraph.</p>
+
+</body>
+</html></item>
+ </line>
+ </data>
+ <fields>
+ <field>
+ <length>-1</length>
+ <precision>-1</precision>
+ <set_empty_string>N</set_empty_string>
+ <name>html</name>
+ <type>String</type>
+ </field>
+ </fields>
+ <attributes/>
+ <GUI>
+ <xloc>128</xloc>
+ <yloc>112</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Verify</name>
+ <type>Dummy</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <attributes/>
+ <GUI>
+ <xloc>336</xloc>
+ <yloc>112</yloc>
+ </GUI>
+ </transform>
+ <transform_error_handling>
+ </transform_error_handling>
+ <attributes/>
+</pipeline>
diff --git a/integration-tests/transforms/datasets/golden-html2text.csv
b/integration-tests/transforms/datasets/golden-html2text.csv
new file mode 100644
index 0000000000..eb4b87dcf4
--- /dev/null
+++ b/integration-tests/transforms/datasets/golden-html2text.csv
@@ -0,0 +1,18 @@
+html,html2text_output
+"<!DOCTYPE html>
+<html>
+<body>
+
+<h1>My First Heading</h1>
+<p>My first paragraph.</p>
+
+</body>
+</html>","
+
+
+
+My First Heading
+My first paragraph.
+
+
+"
diff --git
a/integration-tests/transforms/metadata/dataset/golden-html2text.json
b/integration-tests/transforms/metadata/dataset/golden-html2text.json
new file mode 100644
index 0000000000..77f9c1185f
--- /dev/null
+++ b/integration-tests/transforms/metadata/dataset/golden-html2text.json
@@ -0,0 +1,24 @@
+{
+ "base_filename": "golden-html2text.csv",
+ "name": "golden-html2text",
+ "description": "",
+ "dataset_fields": [
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_name": "html",
+ "field_format": ""
+ },
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_name": "html2text_output",
+ "field_format": ""
+ }
+ ],
+ "folder_name": ""
+}
\ No newline at end of file
diff --git a/integration-tests/transforms/metadata/unit-test/0086-html2text
UNIT.json b/integration-tests/transforms/metadata/unit-test/0086-html2text
UNIT.json
new file mode 100644
index 0000000000..01e2664873
--- /dev/null
+++ b/integration-tests/transforms/metadata/unit-test/0086-html2text UNIT.json
@@ -0,0 +1,33 @@
+{
+ "database_replacements": [],
+ "autoOpening": true,
+ "description": "",
+ "persist_filename": "",
+ "test_type": "UNIT_TEST",
+ "variableValues": [],
+ "basePath": "${HOP_UNIT_TESTS_FOLDER}",
+ "golden_data_sets": [
+ {
+ "field_mappings": [
+ {
+ "transform_field": "html",
+ "data_set_field": "html"
+ },
+ {
+ "transform_field": "html2text_output",
+ "data_set_field": "html2text_output"
+ }
+ ],
+ "field_order": [
+ "html",
+ "html2text_output"
+ ],
+ "data_set_name": "golden-html2text",
+ "transform_name": "Verify"
+ }
+ ],
+ "input_data_sets": [],
+ "name": "0086-html2text UNIT",
+ "trans_test_tweaks": [],
+ "pipeline_filename":
"/home/matt/git/mattcasters/hop/integration-tests/database/0086-html2text.hpl"
+}
\ No newline at end of file
diff --git
a/plugins/transforms/html2text/src/main/java/org/apache/hop/pipeline/transforms/html2text/Html2Text.java
b/plugins/transforms/html2text/src/main/java/org/apache/hop/pipeline/transforms/html2text/Html2Text.java
index 6cece768de..e73b4a80bb 100644
---
a/plugins/transforms/html2text/src/main/java/org/apache/hop/pipeline/transforms/html2text/Html2Text.java
+++
b/plugins/transforms/html2text/src/main/java/org/apache/hop/pipeline/transforms/html2text/Html2Text.java
@@ -35,14 +35,12 @@ import org.apache.hop.pipeline.Pipeline;
import org.apache.hop.pipeline.PipelineMeta;
import org.apache.hop.pipeline.transform.BaseTransform;
import org.apache.hop.pipeline.transform.TransformMeta;
-import org.apache.hop.pipeline.transforms.html2text.Html2TextMeta.SafelistType;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Safelist;
public class Html2Text extends BaseTransform<Html2TextMeta, Html2TextData> {
public static final int OVER_ALLOCATE_SIZE = 10;
private static final Class<?> PKG = Html2Text.class; // For Translator
- private SafelistType safelistType;
private final int cores = Runtime.getRuntime().availableProcessors();
private final int maxJobs = cores * 100;
@@ -61,14 +59,14 @@ public class Html2Text extends BaseTransform<Html2TextMeta,
Html2TextData> {
super(transformMeta, meta, data, copyNr, pipelineMeta, pipeline);
}
- private void finish() {
+ private void finish() throws HopException {
// Wait until all jobs are finished.
synchronized (lock) {
while (jobs.get() > 0) {
try {
lock.wait();
} catch (InterruptedException e) {
- throw new RuntimeException(e);
+ throw new HopException("Waiting for jobs to finished interrupted",
e);
}
}
}
@@ -100,11 +98,8 @@ public class Html2Text extends BaseTransform<Html2TextMeta,
Html2TextData> {
}
// cache the position of the field
cacheIndexPositions();
-
- this.safelistType = SafelistType.valueOf(meta.getSafelistType());
} // End If first
- boolean sendToErrorRow = false;
String errorMessage = null;
process(r);
@@ -119,7 +114,6 @@ public class Html2Text extends BaseTransform<Html2TextMeta,
Html2TextData> {
}
} catch (Exception e) {
if (getTransformMeta().isDoingErrorHandling()) {
- sendToErrorRow = true;
errorMessage = e.toString();
} else {
logError(BaseMessages.getString(PKG,
"Html2Text.ErrorInTransformRunning") + e.getMessage());
@@ -128,10 +122,9 @@ public class Html2Text extends
BaseTransform<Html2TextMeta, Html2TextData> {
setOutputDone(); // signal end to receiver(s)
return false;
}
- if (sendToErrorRow) {
- // Simply add this row to the error row
- putError(getInputRowMeta(), r, 1, errorMessage, meta.getHtmlField(),
"Html2Text001");
- }
+
+ // Add this row to the error output of this transform
+ putError(getInputRowMeta(), r, 1, errorMessage, meta.getHtmlField(),
"Html2Text001");
}
return true;
@@ -150,13 +143,13 @@ public class Html2Text extends
BaseTransform<Html2TextMeta, Html2TextData> {
}
}
- private void processAsync(String html, Object[] inputRow) {
+ private void processAsync(String html, Object[] inputRow) throws
HopTransformException {
synchronized (lock) {
while (jobs.get() > maxJobs) {
try {
lock.wait();
} catch (InterruptedException e) {
- throw new RuntimeException(e);
+ throw new HopTransformException("Error waiting for async processing
lock release", e);
}
}
}
@@ -184,12 +177,11 @@ public class Html2Text extends
BaseTransform<Html2TextMeta, Html2TextData> {
if (meta.isCleanOnly()) {
Safelist safelist;
- switch (safelistType) {
- case basic -> safelist = Safelist.basic();
- case simpleText -> safelist = Safelist.simpleText();
- case basicWithImages -> safelist = Safelist.basicWithImages();
- case none -> safelist = Safelist.none();
- case relaxed -> safelist = Safelist.relaxed();
+ switch (meta.getSafelistType()) {
+ case SIMPLE_TEXT -> safelist = Safelist.simpleText();
+ case BASIC_WITH_IMAGES -> safelist = Safelist.basicWithImages();
+ case NONE -> safelist = Safelist.none();
+ case RELAXED -> safelist = Safelist.relaxed();
default -> safelist = Safelist.basic();
}
diff --git
a/plugins/transforms/html2text/src/main/java/org/apache/hop/pipeline/transforms/html2text/Html2TextDialog.java
b/plugins/transforms/html2text/src/main/java/org/apache/hop/pipeline/transforms/html2text/Html2TextDialog.java
index 4283ccdcdb..359f39035c 100644
---
a/plugins/transforms/html2text/src/main/java/org/apache/hop/pipeline/transforms/html2text/Html2TextDialog.java
+++
b/plugins/transforms/html2text/src/main/java/org/apache/hop/pipeline/transforms/html2text/Html2TextDialog.java
@@ -19,7 +19,6 @@ package org.apache.hop.pipeline.transforms.html2text;
import static org.apache.hop.core.util.Utils.isEmpty;
import static org.apache.hop.i18n.BaseMessages.getString;
-import static
org.apache.hop.pipeline.transforms.html2text.Html2TextMeta.SafelistType.getTypeFromDescription;
import static org.eclipse.swt.SWT.BORDER;
import static org.eclipse.swt.SWT.CHECK;
import static org.eclipse.swt.SWT.CURSOR_WAIT;
@@ -28,6 +27,7 @@ import static org.eclipse.swt.SWT.READ_ONLY;
import static org.eclipse.swt.SWT.RIGHT;
import static org.eclipse.swt.SWT.SINGLE;
+import org.apache.hop.core.Const;
import org.apache.hop.core.exception.HopException;
import org.apache.hop.core.row.IRowMeta;
import org.apache.hop.core.variables.IVariables;
@@ -177,7 +177,7 @@ public class Html2TextDialog extends BaseTransformDialog
implements ITransformDi
wNormalisedText.setToolTipText(getString(PKG,
"Html2TextDialog.NormalisedText.Tooltip"));
FormData fdNormalisedText = new FormData();
fdNormalisedText.left = new FormAttachment(middle, -margin);
- fdNormalisedText.top = new FormAttachment(wOutputField, margin);
+ fdNormalisedText.top = new FormAttachment(wlNormalisedText, 0, SWT.CENTER);
fdNormalisedText.right = new FormAttachment(100, 0);
wNormalisedText.setLayoutData(fdNormalisedText);
wNormalisedText.addSelectionListener(
@@ -202,7 +202,7 @@ public class Html2TextDialog extends BaseTransformDialog
implements ITransformDi
// wCleanOnly.setToolTipText(getString(PKG,
"Html2TextDialog.CleanOnly.Tooltip"));
FormData fdCleanOnly = new FormData();
fdCleanOnly.left = new FormAttachment(middle, -margin);
- fdCleanOnly.top = new FormAttachment(wNormalisedText, margin);
+ fdCleanOnly.top = new FormAttachment(wlCleanOnly, 0, SWT.CENTER);
fdCleanOnly.right = new FormAttachment(100, 0);
wCleanOnly.setLayoutData(fdCleanOnly);
@@ -265,7 +265,7 @@ public class Html2TextDialog extends BaseTransformDialog
implements ITransformDi
wParallelism.setToolTipText(getString(PKG,
"Html2TextDialog.Parallelism.Tooltip"));
FormData fdParallelism = new FormData();
fdParallelism.left = new FormAttachment(middle, -margin);
- fdParallelism.top = new FormAttachment(wSafelistType, margin);
+ fdParallelism.top = new FormAttachment(wlParallelism, 0, SWT.CENTER);
fdParallelism.right = new FormAttachment(100, 0);
wParallelism.setLayoutData(fdParallelism);
wParallelism.addSelectionListener(
@@ -294,28 +294,12 @@ public class Html2TextDialog extends BaseTransformDialog
implements ITransformDi
/** Copy information from the meta-data input to the dialog fields. */
public void getData() {
- if (input.getHtmlField() != null) {
- wHtmlFieldName.setText(input.getHtmlField());
- }
-
- if (input.isParallelism()) {
- wParallelism.setEnabled(input.isParallelism());
- }
-
- if (input.isCleanOnly()) {
- wCleanOnly.setEnabled(input.isCleanOnly());
- }
-
- if (input.isNormalisedText()) {
- wNormalisedText.setEnabled(input.isNormalisedText());
- }
-
- if (input.getSafelistType() != null) {
- String d =
SafelistType.valueOf(input.getSafelistType()).getDescription();
- wSafelistType.setText(d);
- }
-
- wOutputField.setText(String.valueOf(input.getOutputField()));
+ wHtmlFieldName.setText(Const.NVL(input.getHtmlField(), ""));
+ wParallelism.setSelection(input.isParallelism());
+ wCleanOnly.setSelection(input.isCleanOnly());
+ wNormalisedText.setSelection(input.isNormalisedText());
+ wSafelistType.setText(input.getSafelistType().getDescription());
+ wOutputField.setText(Const.NVL(input.getOutputField(), ""));
}
private void cancel() {
@@ -330,9 +314,7 @@ public class Html2TextDialog extends BaseTransformDialog
implements ITransformDi
}
input.setHtmlField(wHtmlFieldName.getText());
-
-
input.setSafelistType(getTypeFromDescription(wSafelistType.getText()).getCode());
-
+
input.setSafelistType(SafelistType.getTypeFromDescription(wSafelistType.getText()));
input.setOutputField(wOutputField.getText());
input.setCleanOnly(wCleanOnly.getSelection());
input.setNormalisedText(wNormalisedText.getSelection());
diff --git
a/plugins/transforms/html2text/src/main/java/org/apache/hop/pipeline/transforms/html2text/Html2TextMeta.java
b/plugins/transforms/html2text/src/main/java/org/apache/hop/pipeline/transforms/html2text/Html2TextMeta.java
index 631c884447..6e065ffc8a 100644
---
a/plugins/transforms/html2text/src/main/java/org/apache/hop/pipeline/transforms/html2text/Html2TextMeta.java
+++
b/plugins/transforms/html2text/src/main/java/org/apache/hop/pipeline/transforms/html2text/Html2TextMeta.java
@@ -17,31 +17,28 @@
package org.apache.hop.pipeline.transforms.html2text;
-import static org.apache.commons.lang3.StringUtils.equalsIgnoreCase;
import static org.apache.hop.core.ICheckResult.TYPE_RESULT_ERROR;
import static org.apache.hop.core.ICheckResult.TYPE_RESULT_OK;
import static org.apache.hop.core.util.Utils.isEmpty;
-import static org.apache.hop.core.xml.XmlHandler.addTagValue;
-import static org.apache.hop.core.xml.XmlHandler.getTagValue;
import static org.apache.hop.i18n.BaseMessages.getString;
-import static
org.apache.hop.pipeline.transforms.html2text.Html2TextMeta.SafelistType.basic;
+import static
org.apache.hop.pipeline.transforms.html2text.Html2TextMeta.SafelistType.BASIC;
import java.util.List;
+import lombok.Getter;
+import lombok.Setter;
import org.apache.hop.core.CheckResult;
import org.apache.hop.core.ICheckResult;
import org.apache.hop.core.annotations.Transform;
-import org.apache.hop.core.exception.HopXmlException;
import org.apache.hop.core.row.IRowMeta;
import org.apache.hop.core.row.IValueMeta;
-import org.apache.hop.core.row.value.ValueMetaBoolean;
-import org.apache.hop.core.row.value.ValueMetaInteger;
import org.apache.hop.core.row.value.ValueMetaString;
import org.apache.hop.core.variables.IVariables;
+import org.apache.hop.metadata.api.HopMetadataProperty;
+import org.apache.hop.metadata.api.IEnumHasCodeAndDescription;
import org.apache.hop.metadata.api.IHopMetadataProvider;
import org.apache.hop.pipeline.PipelineMeta;
import org.apache.hop.pipeline.transform.BaseTransformMeta;
import org.apache.hop.pipeline.transform.TransformMeta;
-import org.w3c.dom.Node;
@Transform(
id = "Html2Text",
@@ -50,90 +47,49 @@ import org.w3c.dom.Node;
description = "i18n::BaseTransform.TypeTooltipDesc.Html2Text",
categoryDescription =
"i18n:org.apache.hop.pipeline.transform:BaseTransform.Category.Transform",
documentationUrl = "/pipeline/transforms/html2text.html")
+@Getter
+@Setter
public class Html2TextMeta extends BaseTransformMeta<Html2Text, Html2TextData>
{
private static final Class<?> PKG = Html2TextMeta.class; // For Translator
+ @HopMetadataProperty(key = "htmlField")
private String htmlField;
- private String outputField = "html2text_output";
- private String safelistType = basic.getCode();
- private boolean parallelism = false;
- private boolean normalisedText = false;
- private boolean cleanOnly = false;
- public Html2TextMeta() {
- super();
- }
+ @HopMetadataProperty(key = "outputField")
+ private String outputField;
- @Override
- public void setDefault() {
- normalisedText = false;
- parallelism = false;
- cleanOnly = false;
- outputField = "html2text_output";
- safelistType = basic.getCode();
- }
+ @HopMetadataProperty(key = "safelistType", storeWithCode = true)
+ private SafelistType safelistType;
- @Override
- public void loadXml(Node transformNode, IHopMetadataProvider
metadataProvider)
- throws HopXmlException {
- try {
- htmlField = getTagValue(transformNode, "htmlField");
- outputField = getTagValue(transformNode, "outputField");
- safelistType = getTagValue(transformNode, "safelistType");
- cleanOnly = equalsIgnoreCase("Y", getTagValue(transformNode,
"cleanOnly"));
- parallelism = equalsIgnoreCase("Y", getTagValue(transformNode,
"parallelism"));
- normalisedText = equalsIgnoreCase("Y", getTagValue(transformNode,
"normalisedText"));
+ @HopMetadataProperty(key = "cleanOnly")
+ private boolean cleanOnly;
- } catch (Exception e) {
- throw new HopXmlException(
- getString(PKG, "Html2TextMeta.Exception.UnableToReadTransformMeta"),
e);
- }
+ @HopMetadataProperty(key = "parallelism")
+ private boolean parallelism;
+
+ @HopMetadataProperty(key = "normalisedText")
+ private boolean normalisedText;
+
+ public Html2TextMeta() {
+ super();
+ this.outputField = "html2text_output";
+ this.safelistType = BASIC;
}
@Override
public void getFields(
- IRowMeta r,
+ IRowMeta inputRowMeta,
String name,
IRowMeta[] info,
TransformMeta nextTransform,
IVariables variables,
IHopMetadataProvider metadataProvider) {
- valueMetaString(r, name, outputField);
- }
-
- private void valueMetaString(IRowMeta r, String name, String metaName) {
- IValueMeta sText = new ValueMetaString(metaName);
- sText.setOrigin(name);
- r.addValueMeta(sText);
- }
-
- private void valueMetaBoolean(IRowMeta r, String name, String metaName) {
- IValueMeta sText = new ValueMetaBoolean(metaName);
- sText.setOrigin(name);
- r.addValueMeta(sText);
- }
-
- private void valueMetaInteger(IRowMeta r, String name, String metaName) {
- IValueMeta sText = new ValueMetaInteger(metaName);
- sText.setOrigin(name);
- r.addValueMeta(sText);
- }
-
- @Override
- public String getXml() {
- return " "
- + addTagValue("htmlField", htmlField)
- + " "
- + addTagValue("outputField", outputField)
- + " "
- + addTagValue("safelistType", safelistType)
- + " "
- + addTagValue("cleanOnly", cleanOnly)
- + " "
- + addTagValue("normalisedText", normalisedText)
- + " "
- + addTagValue("parallelism", parallelism);
+ // We simply add the output field to contain the generated text.
+ //
+ IValueMeta outputFieldMeta = new ValueMetaString(outputField);
+ outputFieldMeta.setOrigin(name);
+ inputRowMeta.addValueMeta(outputFieldMeta);
}
@Override
@@ -201,60 +157,13 @@ public class Html2TextMeta extends
BaseTransformMeta<Html2Text, Html2TextData> {
return true;
}
- public String getHtmlField() {
- return htmlField;
- }
-
- public void setHtmlField(String htmlField) {
- this.htmlField = htmlField;
- }
-
- public String getOutputField() {
- return outputField;
- }
-
- public void setOutputField(String outputField) {
- this.outputField = outputField;
- }
-
- public boolean isParallelism() {
- return parallelism;
- }
-
- public void setParallelism(boolean parallelism) {
- this.parallelism = parallelism;
- }
-
- public String getSafelistType() {
- return safelistType;
- }
-
- public void setSafelistType(String safelistType) {
- this.safelistType = safelistType;
- }
-
- public boolean isCleanOnly() {
- return cleanOnly;
- }
-
- public void setCleanOnly(boolean cleanOnly) {
- this.cleanOnly = cleanOnly;
- }
-
- public boolean isNormalisedText() {
- return normalisedText;
- }
-
- public void setNormalisedText(boolean normalisedText) {
- this.normalisedText = normalisedText;
- }
-
- public enum SafelistType {
- none("none", getString(PKG, "Html2TextDialog.SafelistType.none")),
- relaxed("relaxed", getString(PKG, "Html2TextDialog.SafelistType.relaxed")),
- basic("basic", getString(PKG, "Html2TextDialog.SafelistType.basic")),
- simpleText("simpleText", getString(PKG,
"Html2TextDialog.SafelistType.simpleText")),
- basicWithImages(
+ @Getter
+ public enum SafelistType implements IEnumHasCodeAndDescription {
+ NONE("none", getString(PKG, "Html2TextDialog.SafelistType.none")),
+ RELAXED("relaxed", getString(PKG, "Html2TextDialog.SafelistType.relaxed")),
+ BASIC("basic", getString(PKG, "Html2TextDialog.SafelistType.basic")),
+ SIMPLE_TEXT("simpleText", getString(PKG,
"Html2TextDialog.SafelistType.simpleText")),
+ BASIC_WITH_IMAGES(
"basicWithImages", getString(PKG,
"Html2TextDialog.SafelistType.basicWithImages"));
private final String code;
@@ -266,29 +175,11 @@ public class Html2TextMeta extends
BaseTransformMeta<Html2Text, Html2TextData> {
}
public static SafelistType getTypeFromDescription(String description) {
- for (SafelistType type : values()) {
- if (equalsIgnoreCase(type.description, description)) {
- return type;
- }
- }
- return basic;
+ return IEnumHasCodeAndDescription.lookupDescription(SafelistType.class,
description, BASIC);
}
public static String[] getDescriptions() {
- SafelistType[] types = SafelistType.values();
- String[] descriptions = new String[types.length];
- for (int i = 0; i < types.length; i++) {
- descriptions[i] = types[i].description;
- }
- return descriptions;
- }
-
- public String getCode() {
- return code;
- }
-
- public String getDescription() {
- return description;
+ return IEnumHasCodeAndDescription.getDescriptions(SafelistType.class);
}
}
}
diff --git
a/plugins/transforms/html2text/src/test/java/org/apache/hop/pipeline/transforms/html2text/Html2TextMetaTest.java
b/plugins/transforms/html2text/src/test/java/org/apache/hop/pipeline/transforms/html2text/Html2TextMetaTest.java
index e887a1c09b..041e401389 100644
---
a/plugins/transforms/html2text/src/test/java/org/apache/hop/pipeline/transforms/html2text/Html2TextMetaTest.java
+++
b/plugins/transforms/html2text/src/test/java/org/apache/hop/pipeline/transforms/html2text/Html2TextMetaTest.java
@@ -17,25 +17,34 @@
package org.apache.hop.pipeline.transforms.html2text;
-import java.util.Arrays;
-import java.util.List;
-import org.apache.hop.core.exception.HopException;
-import org.apache.hop.junit.rules.RestoreHopEngineEnvironmentExtension;
-import org.apache.hop.pipeline.transforms.loadsave.LoadSaveTester;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import org.apache.hop.core.xml.XmlHandler;
+import org.apache.hop.metadata.serializer.memory.MemoryMetadataProvider;
+import org.apache.hop.metadata.serializer.xml.XmlMetadataUtil;
+import org.apache.hop.pipeline.transform.TransformMeta;
+import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.extension.RegisterExtension;
class Html2TextMetaTest {
- @RegisterExtension
- static RestoreHopEngineEnvironmentExtension env = new
RestoreHopEngineEnvironmentExtension();
@Test
- void testLoadSave() throws HopException {
- List<String> attributes = Arrays.asList("htmlField", "outputField");
+ void testLoadSave() throws Exception {
+ Path path = Paths.get(getClass().getResource("/transform.xml").toURI());
+ String xml = Files.readString(path);
+ Html2TextMeta meta = new Html2TextMeta();
+ XmlMetadataUtil.deSerializeFromXml(
+ XmlHandler.loadXmlString(xml, TransformMeta.XML_TAG),
+ Html2TextMeta.class,
+ meta,
+ new MemoryMetadataProvider());
- LoadSaveTester<Html2TextMeta> loadSaveTester =
- new LoadSaveTester<>(Html2TextMeta.class, attributes);
-
- loadSaveTester.testSerialization();
+ Assertions.assertEquals("html", meta.getHtmlField());
+ Assertions.assertEquals("outputField", meta.getOutputField());
+ Assertions.assertEquals(Html2TextMeta.SafelistType.BASIC,
meta.getSafelistType());
+ Assertions.assertTrue(meta.isCleanOnly());
+ Assertions.assertTrue(meta.isNormalisedText());
+ Assertions.assertTrue(meta.isParallelism());
}
}
diff --git a/plugins/transforms/html2text/src/test/resources/transform.xml
b/plugins/transforms/html2text/src/test/resources/transform.xml
new file mode 100644
index 0000000000..cb8f56348b
--- /dev/null
+++ b/plugins/transforms/html2text/src/test/resources/transform.xml
@@ -0,0 +1,25 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ ~
+ -->
+<transform>
+ <cleanOnly>Y</cleanOnly>
+ <htmlField>html</htmlField>
+ <normalisedText>Y</normalisedText>
+ <outputField>outputField</outputField>
+ <parallelism>Y</parallelism>
+ <safelistType>basic</safelistType>
+</transform>
\ No newline at end of file