Copilot commented on code in PR #9333:
URL: https://github.com/apache/seatunnel/pull/9333#discussion_r2094789498
##########
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/regexparse/RegexParseTransformConfig.java:
##########
@@ -0,0 +1,32 @@
+ package org.apache.seatunnel.transform.regexparse;
+
+ import org.apache.seatunnel.api.configuration.Option;
+ import org.apache.seatunnel.api.configuration.Options;
+
+ import java.io.Serializable;
+ import java.util.Map;
+
+
+ public class RegexParseTransformConfig implements Serializable {
+ private static final long serialVersionUID = -930897758226053570L;
+ public static final Option<String> REGEX_PARSE_FIELD =
+ Options.key("regex_parse_field")
+ .stringType()
+ .noDefaultValue()
+ .withDescription(
+ "Upstream fields that require parsing");
Review Comment:
[nitpick] The description says 'fields' but only a single field is
supported; consider changing it to 'Upstream field that requires parsing'.
```suggestion
"Upstream field that requires parsing");
```
##########
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/regexparse/RegexParseTransform.java:
##########
@@ -0,0 +1,92 @@
+ package org.apache.seatunnel.transform.regexparse;
+
+ import lombok.extern.slf4j.Slf4j;
+ import org.apache.commons.lang3.StringUtils;
+ import org.apache.commons.lang3.math.NumberUtils;
+ import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+ import org.apache.seatunnel.api.table.catalog.Column;
+ import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+ import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+ import org.apache.seatunnel.api.table.catalog.TableSchema;
+ import org.apache.seatunnel.api.table.factory.TableTransformFactoryContext;
+ import org.apache.seatunnel.api.table.type.BasicType;
+ import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+ import
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform;
+
+ import java.util.ArrayList;
+ import java.util.Arrays;
+ import java.util.List;
+ import java.util.Map;
+ import java.util.regex.Matcher;
+ import java.util.regex.Pattern;
+
+
+ @Slf4j
+ public class RegexParseTransform extends AbstractCatalogSupportMapTransform {
+ private int fieldIndex = -1;
+ private final Pattern regex;
+ private final Map<String, String> groupMap;
+
+ public RegexParseTransform(TableTransformFactoryContext context) {
+ super(context.getCatalogTables().get(0));
+ ReadonlyConfig options = context.getOptions();
+ String regexParseField =
options.get(RegexParseTransformConfig.REGEX_PARSE_FIELD);
+ this.regex =
Pattern.compile(options.get(RegexParseTransformConfig.REGEX));
+ this.groupMap = options.get(RegexParseTransformConfig.GROUP_MAP);
+ List<Column> columns =
context.getCatalogTables().get(0).getTableSchema().getColumns();
+ for (int i = 0; i < columns.size(); i++) {
+ if (regexParseField.equals(columns.get(i).getName())) {
+ if
(!BasicType.STRING_TYPE.equals(columns.get(i).getDataType())) {
+ throw new RuntimeException("regex_parse_field type must be
string");
+ }
+ fieldIndex = i;
+ }
+ }
+ if (fieldIndex == -1) {
+ throw new RuntimeException("regex_parse_field not Contained");
+ }
+ }
+
+ @Override
+ protected TableSchema transformTableSchema() {
+ List<Column> oldColumns =
inputCatalogTable.getTableSchema().getColumns();
+ List<Column> newColumns = new ArrayList<>();
+ for (String key : groupMap.keySet()) {
Review Comment:
Iterating over a Map's keySet without a defined order can lead to
unpredictable column ordering. Consider enforcing a stable order (e.g., using a
LinkedHashMap or sorting the keys).
```suggestion
List<String> sortedKeys = new ArrayList<>(groupMap.keySet());
sortedKeys.sort(String::compareTo);
for (String key : sortedKeys) {
```
##########
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/regexparse/RegexParseTransform.java:
##########
@@ -0,0 +1,92 @@
+ package org.apache.seatunnel.transform.regexparse;
+
+ import lombok.extern.slf4j.Slf4j;
+ import org.apache.commons.lang3.StringUtils;
+ import org.apache.commons.lang3.math.NumberUtils;
+ import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+ import org.apache.seatunnel.api.table.catalog.Column;
+ import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+ import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+ import org.apache.seatunnel.api.table.catalog.TableSchema;
+ import org.apache.seatunnel.api.table.factory.TableTransformFactoryContext;
+ import org.apache.seatunnel.api.table.type.BasicType;
+ import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+ import
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform;
+
+ import java.util.ArrayList;
+ import java.util.Arrays;
+ import java.util.List;
+ import java.util.Map;
+ import java.util.regex.Matcher;
+ import java.util.regex.Pattern;
+
+
+ @Slf4j
+ public class RegexParseTransform extends AbstractCatalogSupportMapTransform {
+ private int fieldIndex = -1;
+ private final Pattern regex;
+ private final Map<String, String> groupMap;
+
+ public RegexParseTransform(TableTransformFactoryContext context) {
+ super(context.getCatalogTables().get(0));
+ ReadonlyConfig options = context.getOptions();
+ String regexParseField =
options.get(RegexParseTransformConfig.REGEX_PARSE_FIELD);
+ this.regex =
Pattern.compile(options.get(RegexParseTransformConfig.REGEX));
+ this.groupMap = options.get(RegexParseTransformConfig.GROUP_MAP);
+ List<Column> columns =
context.getCatalogTables().get(0).getTableSchema().getColumns();
+ for (int i = 0; i < columns.size(); i++) {
+ if (regexParseField.equals(columns.get(i).getName())) {
+ if
(!BasicType.STRING_TYPE.equals(columns.get(i).getDataType())) {
+ throw new RuntimeException("regex_parse_field type must be
string");
+ }
+ fieldIndex = i;
+ }
+ }
+ if (fieldIndex == -1) {
+ throw new RuntimeException("regex_parse_field not Contained");
+ }
+ }
+
+ @Override
+ protected TableSchema transformTableSchema() {
+ List<Column> oldColumns =
inputCatalogTable.getTableSchema().getColumns();
+ List<Column> newColumns = new ArrayList<>();
+ for (String key : groupMap.keySet()) {
+ newColumns.add(PhysicalColumn.of(key, BasicType.STRING_TYPE, 200,
true, null, ""));
+ }
+ newColumns.addAll(0, oldColumns);
+ return TableSchema.builder().columns(newColumns).build();
+ }
+
+ @Override
+ protected TableIdentifier transformTableIdentifier() {
+ return inputCatalogTable.getTableId().copy();
+ }
+
+ @Override
+ protected SeaTunnelRow transformRow(SeaTunnelRow inputRow) {
+ try {
+ Object[] oldFields = inputRow.getFields();
+ String rowValue = String.valueOf(inputRow.getField(fieldIndex));
+ int groupSize = groupMap.size();
+ Object[] merged = Arrays.copyOf(oldFields, oldFields.length +
groupSize);
+ Matcher matcher = regex.matcher(rowValue);
+ if (StringUtils.isBlank(rowValue) || !matcher.matches()) {
+ return new SeaTunnelRow(merged);
+ }
+ Object[] extracted = groupMap.values().stream()
+ .map(index -> matcher.group(NumberUtils.toInt(index)))
+ .toArray();
+ System.arraycopy(extracted, 0, merged, oldFields.length,
groupSize);
+ return new SeaTunnelRow(merged);
+ } catch (Exception e) {
+ throw new RuntimeException("RegexParse->" + e.getMessage());
Review Comment:
Wrapping and rethrowing Exception only with the message loses the stack
trace. Include the original exception as the cause, e.g., `new
RuntimeException("RegexParse->" + e.getMessage(), e)`.
```suggestion
throw new RuntimeException("RegexParse->" + e.getMessage(), e);
```
##########
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/regexparse/RegexParseTransform.java:
##########
@@ -0,0 +1,92 @@
+ package org.apache.seatunnel.transform.regexparse;
+
+ import lombok.extern.slf4j.Slf4j;
+ import org.apache.commons.lang3.StringUtils;
+ import org.apache.commons.lang3.math.NumberUtils;
+ import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+ import org.apache.seatunnel.api.table.catalog.Column;
+ import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+ import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+ import org.apache.seatunnel.api.table.catalog.TableSchema;
+ import org.apache.seatunnel.api.table.factory.TableTransformFactoryContext;
+ import org.apache.seatunnel.api.table.type.BasicType;
+ import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+ import
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform;
+
+ import java.util.ArrayList;
+ import java.util.Arrays;
+ import java.util.List;
+ import java.util.Map;
+ import java.util.regex.Matcher;
+ import java.util.regex.Pattern;
+
+
+ @Slf4j
+ public class RegexParseTransform extends AbstractCatalogSupportMapTransform {
+ private int fieldIndex = -1;
+ private final Pattern regex;
+ private final Map<String, String> groupMap;
+
+ public RegexParseTransform(TableTransformFactoryContext context) {
+ super(context.getCatalogTables().get(0));
+ ReadonlyConfig options = context.getOptions();
+ String regexParseField =
options.get(RegexParseTransformConfig.REGEX_PARSE_FIELD);
+ this.regex =
Pattern.compile(options.get(RegexParseTransformConfig.REGEX));
+ this.groupMap = options.get(RegexParseTransformConfig.GROUP_MAP);
+ List<Column> columns =
context.getCatalogTables().get(0).getTableSchema().getColumns();
+ for (int i = 0; i < columns.size(); i++) {
+ if (regexParseField.equals(columns.get(i).getName())) {
+ if
(!BasicType.STRING_TYPE.equals(columns.get(i).getDataType())) {
+ throw new RuntimeException("regex_parse_field type must be
string");
+ }
+ fieldIndex = i;
+ }
+ }
+ if (fieldIndex == -1) {
+ throw new RuntimeException("regex_parse_field not Contained");
+ }
+ }
+
+ @Override
+ protected TableSchema transformTableSchema() {
+ List<Column> oldColumns =
inputCatalogTable.getTableSchema().getColumns();
+ List<Column> newColumns = new ArrayList<>();
+ for (String key : groupMap.keySet()) {
+ newColumns.add(PhysicalColumn.of(key, BasicType.STRING_TYPE, 200,
true, null, ""));
Review Comment:
The hard-coded column length '200' is a magic number. Extract it into a
named constant or configuration parameter.
```suggestion
newColumns.add(PhysicalColumn.of(key, BasicType.STRING_TYPE,
DEFAULT_COLUMN_LENGTH, true, null, ""));
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]