This is an automated email from the ASF dual-hosted git repository.
fanjia pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git
The following commit(s) were added to refs/heads/dev by this push:
new 00c5aed1af [BugFix][Excel] Fix read formulas/number cell value of
excel (#8316)
00c5aed1af is described below
commit 00c5aed1afce0ae64fa27f473d8f88cbf5829543
Author: Tu-maimes <[email protected]>
AuthorDate: Mon Dec 23 17:45:46 2024 +0800
[BugFix][Excel] Fix read formulas/number cell value of excel (#8316)
Co-authored-by: Tu-maimes <[email protected]>
---
.../file/source/reader/ExcelReadStrategy.java | 30 ++++-
.../file/writer/ExcelReadStrategyTest.java | 137 +++++++++++++++++++++
.../resources/excel/test_read_excel_formula.xlsx | Bin 0 -> 11411 bytes
.../src/test/resources/excel/test_read_formula.xls | Bin 0 -> 22016 bytes
4 files changed, 164 insertions(+), 3 deletions(-)
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
index b3d789be57..c4a982fc90 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
@@ -37,9 +37,14 @@ import
org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorExc
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
+import org.apache.poi.ss.usermodel.CellValue;
+import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.DateUtil;
+import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.util.NumberToTextConverter;
+import org.apache.poi.xssf.usermodel.XSSFFormulaEvaluator;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import lombok.SneakyThrows;
@@ -88,15 +93,19 @@ public class ExcelReadStrategy extends AbstractReadStrategy
{
String currentFileName)
throws IOException {
Workbook workbook;
+ FormulaEvaluator formulaEvaluator;
if (currentFileName.endsWith(".xls")) {
workbook = new HSSFWorkbook(inputStream);
+ formulaEvaluator =
workbook.getCreationHelper().createFormulaEvaluator();
} else if (currentFileName.endsWith(".xlsx")) {
workbook = new XSSFWorkbook(inputStream);
+ formulaEvaluator = new XSSFFormulaEvaluator((XSSFWorkbook)
workbook);
} else {
throw new FileConnectorException(
CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION,
"Only support read excel file");
}
+ DataFormatter formatter = new DataFormatter();
Sheet sheet =
pluginConfig.hasPath(BaseSourceConfigOptions.SHEET_NAME.key())
? workbook.getSheet(
@@ -131,7 +140,11 @@ public class ExcelReadStrategy extends
AbstractReadStrategy {
cell == null
? null
: convert(
-
getCellValue(cell.getCellType(), cell),
+ getCellValue(
+
cell.getCellType(),
+ cell,
+
formulaEvaluator,
+ formatter),
fieldTypes[z - 1]));
}
if (isMergePartition) {
@@ -182,7 +195,11 @@ public class ExcelReadStrategy extends
AbstractReadStrategy {
"User must defined schema for json file type");
}
- private Object getCellValue(CellType cellType, Cell cell) {
+ private Object getCellValue(
+ CellType cellType,
+ Cell cell,
+ FormulaEvaluator formulaEvaluator,
+ DataFormatter formatter) {
switch (cellType) {
case STRING:
return cell.getStringCellValue();
@@ -192,11 +209,18 @@ public class ExcelReadStrategy extends
AbstractReadStrategy {
if (DateUtil.isCellDateFormatted(cell)) {
return cell.getLocalDateTimeCellValue();
}
- return cell.getNumericCellValue();
+ return formatter.formatCellValue(cell);
case BLANK:
return "";
case ERROR:
break;
+ case FORMULA:
+ CellValue evaluate = formulaEvaluator.evaluate(cell);
+ if (evaluate.getCellType().equals(CellType.NUMERIC)) {
+ return
NumberToTextConverter.toText(evaluate.getNumberValue());
+ } else {
+ return evaluate.formatAsString();
+ }
default:
throw new FileConnectorException(
CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE,
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/ExcelReadStrategyTest.java
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/ExcelReadStrategyTest.java
index 6edc55d56c..f445d932d7 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/ExcelReadStrategyTest.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/ExcelReadStrategyTest.java
@@ -227,6 +227,143 @@ public class ExcelReadStrategyTest {
}
}
+ @Test
+ public void testExcelReadFormulaXls() throws IOException,
URISyntaxException {
+ URL excelFile =
ExcelReadStrategyTest.class.getResource("/excel/test_read_formula.xls");
+ URL conf =
ExcelReadStrategyTest.class.getResource("/excel/test_read_excel.conf");
+ Assertions.assertNotNull(excelFile);
+ Assertions.assertNotNull(conf);
+ String excelFilePath = Paths.get(excelFile.toURI()).toString();
+ String confPath = Paths.get(conf.toURI()).toString();
+ Config pluginConfig = ConfigFactory.parseFile(new File(confPath));
+ ExcelReadStrategy excelReadStrategy = new ExcelReadStrategy();
+ LocalConf localConf = new LocalConf(FS_DEFAULT_NAME_DEFAULT);
+ excelReadStrategy.setPluginConfig(pluginConfig);
+ excelReadStrategy.init(localConf);
+
+ List<String> fileNamesByPath =
excelReadStrategy.getFileNamesByPath(excelFilePath);
+ CatalogTable userDefinedCatalogTable =
CatalogTableUtil.buildWithConfig(pluginConfig);
+ excelReadStrategy.setCatalogTable(userDefinedCatalogTable);
+ TestCollector testCollector = new TestCollector();
+ excelReadStrategy.read(fileNamesByPath.get(0), "", testCollector);
+
+ for (SeaTunnelRow seaTunnelRow : testCollector.getRows()) {
+ Assertions.assertEquals(seaTunnelRow.getArity(), 14);
+ Assertions.assertEquals(seaTunnelRow.getField(0).getClass(),
Byte.class);
+ Assertions.assertEquals(seaTunnelRow.getField(1).getClass(),
Short.class);
+ Assertions.assertEquals(seaTunnelRow.getField(2).getClass(),
Integer.class);
+ Assertions.assertEquals(seaTunnelRow.getField(3).getClass(),
Long.class);
+ Assertions.assertEquals(seaTunnelRow.getField(4).getClass(),
String.class);
+ Assertions.assertEquals(seaTunnelRow.getField(5).getClass(),
Double.class);
+ Assertions.assertEquals(seaTunnelRow.getField(6).getClass(),
Float.class);
+ Assertions.assertEquals(seaTunnelRow.getField(7).getClass(),
BigDecimal.class);
+ Assertions.assertEquals(seaTunnelRow.getField(8).getClass(),
Boolean.class);
+ Assertions.assertEquals(seaTunnelRow.getField(9).getClass(),
LinkedHashMap.class);
+ Assertions.assertEquals(seaTunnelRow.getField(10).getClass(),
String[].class);
+ Assertions.assertEquals(seaTunnelRow.getField(11).getClass(),
LocalDate.class);
+ Assertions.assertEquals(seaTunnelRow.getField(12).getClass(),
LocalDateTime.class);
+ Assertions.assertEquals(seaTunnelRow.getField(13).getClass(),
LocalTime.class);
+ Assertions.assertEquals(seaTunnelRow.getField(0), (byte) 1);
+ Assertions.assertEquals(seaTunnelRow.getField(1), (short) 22);
+ Assertions.assertEquals(seaTunnelRow.getField(2), 333);
+ Assertions.assertEquals(seaTunnelRow.getField(3), 355L);
+ Assertions.assertEquals(seaTunnelRow.getField(4), "Cosmos");
+ Assertions.assertEquals(seaTunnelRow.getField(5), 5.555);
+ Assertions.assertEquals(seaTunnelRow.getField(6), (float) 6.666);
+ Assertions.assertEquals(seaTunnelRow.getField(7), new
BigDecimal("7.78"));
+ Assertions.assertEquals(seaTunnelRow.getField(8), Boolean.FALSE);
+ Assertions.assertEquals(
+ seaTunnelRow.getField(9),
+ new LinkedHashMap<String, String>() {
+ {
+ put("name", "Ivan");
+ put("age", "26");
+ }
+ });
+ Assertions.assertArrayEquals(
+ (String[]) seaTunnelRow.getField(10), new String[]
{"Ivan", "Dusayi"});
+ Assertions.assertEquals(
+ seaTunnelRow.getField(11),
+ DateUtils.parse("2024-01-31",
DateUtils.Formatter.YYYY_MM_DD));
+ Assertions.assertEquals(
+ seaTunnelRow.getField(12),
+ DateTimeUtils.parse(
+ "2024-01-31 16:00:48",
DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS));
+ Assertions.assertEquals(
+ seaTunnelRow.getField(13),
+ TimeUtils.parse("16:00:48", TimeUtils.Formatter.HH_MM_SS));
+ }
+ }
+
+ @Test
+ public void testExcelReadFormula() throws IOException, URISyntaxException {
+ URL excelFile =
+
ExcelReadStrategyTest.class.getResource("/excel/test_read_excel_formula.xlsx");
+ URL conf =
ExcelReadStrategyTest.class.getResource("/excel/test_read_excel.conf");
+ Assertions.assertNotNull(excelFile);
+ Assertions.assertNotNull(conf);
+ String excelFilePath = Paths.get(excelFile.toURI()).toString();
+ String confPath = Paths.get(conf.toURI()).toString();
+ Config pluginConfig = ConfigFactory.parseFile(new File(confPath));
+ ExcelReadStrategy excelReadStrategy = new ExcelReadStrategy();
+ LocalConf localConf = new LocalConf(FS_DEFAULT_NAME_DEFAULT);
+ excelReadStrategy.setPluginConfig(pluginConfig);
+ excelReadStrategy.init(localConf);
+
+ List<String> fileNamesByPath =
excelReadStrategy.getFileNamesByPath(excelFilePath);
+ CatalogTable userDefinedCatalogTable =
CatalogTableUtil.buildWithConfig(pluginConfig);
+ excelReadStrategy.setCatalogTable(userDefinedCatalogTable);
+ TestCollector testCollector = new TestCollector();
+ excelReadStrategy.read(fileNamesByPath.get(0), "", testCollector);
+
+ for (SeaTunnelRow seaTunnelRow : testCollector.getRows()) {
+ Assertions.assertEquals(seaTunnelRow.getArity(), 14);
+ Assertions.assertEquals(seaTunnelRow.getField(0).getClass(),
Byte.class);
+ Assertions.assertEquals(seaTunnelRow.getField(1).getClass(),
Short.class);
+ Assertions.assertEquals(seaTunnelRow.getField(2).getClass(),
Integer.class);
+ Assertions.assertEquals(seaTunnelRow.getField(3).getClass(),
Long.class);
+ Assertions.assertEquals(seaTunnelRow.getField(4).getClass(),
String.class);
+ Assertions.assertEquals(seaTunnelRow.getField(5).getClass(),
Double.class);
+ Assertions.assertEquals(seaTunnelRow.getField(6).getClass(),
Float.class);
+ Assertions.assertEquals(seaTunnelRow.getField(7).getClass(),
BigDecimal.class);
+ Assertions.assertEquals(seaTunnelRow.getField(8).getClass(),
Boolean.class);
+ Assertions.assertEquals(seaTunnelRow.getField(9).getClass(),
LinkedHashMap.class);
+ Assertions.assertEquals(seaTunnelRow.getField(10).getClass(),
String[].class);
+ Assertions.assertEquals(seaTunnelRow.getField(11).getClass(),
LocalDate.class);
+ Assertions.assertEquals(seaTunnelRow.getField(12).getClass(),
LocalDateTime.class);
+ Assertions.assertEquals(seaTunnelRow.getField(13).getClass(),
LocalTime.class);
+ Assertions.assertEquals(seaTunnelRow.getField(0), (byte) 1);
+ Assertions.assertEquals(seaTunnelRow.getField(1), (short) 22);
+ Assertions.assertEquals(seaTunnelRow.getField(2), 333);
+ Assertions.assertEquals(seaTunnelRow.getField(3), 355L);
+ Assertions.assertEquals(seaTunnelRow.getField(4), "Cosmos");
+ Assertions.assertEquals(seaTunnelRow.getField(5), 5.555);
+ Assertions.assertEquals(seaTunnelRow.getField(6), (float) 6.666);
+ Assertions.assertEquals(seaTunnelRow.getField(7), new
BigDecimal("7.78"));
+ Assertions.assertEquals(seaTunnelRow.getField(8), Boolean.FALSE);
+ Assertions.assertEquals(
+ seaTunnelRow.getField(9),
+ new LinkedHashMap<String, String>() {
+ {
+ put("name", "Ivan");
+ put("age", "26");
+ }
+ });
+ Assertions.assertArrayEquals(
+ (String[]) seaTunnelRow.getField(10), new String[]
{"Ivan", "Dusayi"});
+ Assertions.assertEquals(
+ seaTunnelRow.getField(11),
+ DateUtils.parse("2024-01-31",
DateUtils.Formatter.YYYY_MM_DD));
+ Assertions.assertEquals(
+ seaTunnelRow.getField(12),
+ DateTimeUtils.parse(
+ "2024-01-31 16:00:48",
DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS));
+ Assertions.assertEquals(
+ seaTunnelRow.getField(13),
+ TimeUtils.parse("16:00:48", TimeUtils.Formatter.HH_MM_SS));
+ }
+ }
+
@Getter
public static class TestCollector implements Collector<SeaTunnelRow> {
private final List<SeaTunnelRow> rows = new ArrayList<>();
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/excel/test_read_excel_formula.xlsx
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/excel/test_read_excel_formula.xlsx
new file mode 100644
index 0000000000..8190bd22c0
Binary files /dev/null and
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/excel/test_read_excel_formula.xlsx
differ
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/excel/test_read_formula.xls
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/excel/test_read_formula.xls
new file mode 100644
index 0000000000..1ef5e34261
Binary files /dev/null and
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/excel/test_read_formula.xls
differ