This is an automated email from the ASF dual-hosted git repository.

fanjia pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git


The following commit(s) were added to refs/heads/dev by this push:
     new 00c5aed1af [BugFix][Excel] Fix read formulas/number cell value of 
excel (#8316)
00c5aed1af is described below

commit 00c5aed1afce0ae64fa27f473d8f88cbf5829543
Author: Tu-maimes <[email protected]>
AuthorDate: Mon Dec 23 17:45:46 2024 +0800

    [BugFix][Excel] Fix read formulas/number cell value of excel (#8316)
    
    Co-authored-by: Tu-maimes <[email protected]>
---
 .../file/source/reader/ExcelReadStrategy.java      |  30 ++++-
 .../file/writer/ExcelReadStrategyTest.java         | 137 +++++++++++++++++++++
 .../resources/excel/test_read_excel_formula.xlsx   | Bin 0 -> 11411 bytes
 .../src/test/resources/excel/test_read_formula.xls | Bin 0 -> 22016 bytes
 4 files changed, 164 insertions(+), 3 deletions(-)

diff --git 
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
 
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
index b3d789be57..c4a982fc90 100644
--- 
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
+++ 
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
@@ -37,9 +37,14 @@ import 
org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorExc
 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.CellType;
+import org.apache.poi.ss.usermodel.CellValue;
+import org.apache.poi.ss.usermodel.DataFormatter;
 import org.apache.poi.ss.usermodel.DateUtil;
+import org.apache.poi.ss.usermodel.FormulaEvaluator;
 import org.apache.poi.ss.usermodel.Sheet;
 import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.util.NumberToTextConverter;
+import org.apache.poi.xssf.usermodel.XSSFFormulaEvaluator;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 
 import lombok.SneakyThrows;
@@ -88,15 +93,19 @@ public class ExcelReadStrategy extends AbstractReadStrategy 
{
             String currentFileName)
             throws IOException {
         Workbook workbook;
+        FormulaEvaluator formulaEvaluator;
         if (currentFileName.endsWith(".xls")) {
             workbook = new HSSFWorkbook(inputStream);
+            formulaEvaluator = 
workbook.getCreationHelper().createFormulaEvaluator();
         } else if (currentFileName.endsWith(".xlsx")) {
             workbook = new XSSFWorkbook(inputStream);
+            formulaEvaluator = new XSSFFormulaEvaluator((XSSFWorkbook) 
workbook);
         } else {
             throw new FileConnectorException(
                     CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION,
                     "Only support read excel file");
         }
+        DataFormatter formatter = new DataFormatter();
         Sheet sheet =
                 pluginConfig.hasPath(BaseSourceConfigOptions.SHEET_NAME.key())
                         ? workbook.getSheet(
@@ -131,7 +140,11 @@ public class ExcelReadStrategy extends 
AbstractReadStrategy {
                                         cell == null
                                                 ? null
                                                 : convert(
-                                                        
getCellValue(cell.getCellType(), cell),
+                                                        getCellValue(
+                                                                
cell.getCellType(),
+                                                                cell,
+                                                                
formulaEvaluator,
+                                                                formatter),
                                                         fieldTypes[z - 1]));
                             }
                             if (isMergePartition) {
@@ -182,7 +195,11 @@ public class ExcelReadStrategy extends 
AbstractReadStrategy {
                 "User must defined schema for json file type");
     }
 
-    private Object getCellValue(CellType cellType, Cell cell) {
+    private Object getCellValue(
+            CellType cellType,
+            Cell cell,
+            FormulaEvaluator formulaEvaluator,
+            DataFormatter formatter) {
         switch (cellType) {
             case STRING:
                 return cell.getStringCellValue();
@@ -192,11 +209,18 @@ public class ExcelReadStrategy extends 
AbstractReadStrategy {
                 if (DateUtil.isCellDateFormatted(cell)) {
                     return cell.getLocalDateTimeCellValue();
                 }
-                return cell.getNumericCellValue();
+                return formatter.formatCellValue(cell);
             case BLANK:
                 return "";
             case ERROR:
                 break;
+            case FORMULA:
+                CellValue evaluate = formulaEvaluator.evaluate(cell);
+                if (evaluate.getCellType().equals(CellType.NUMERIC)) {
+                    return 
NumberToTextConverter.toText(evaluate.getNumberValue());
+                } else {
+                    return evaluate.formatAsString();
+                }
             default:
                 throw new FileConnectorException(
                         CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE,
diff --git 
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/ExcelReadStrategyTest.java
 
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/ExcelReadStrategyTest.java
index 6edc55d56c..f445d932d7 100644
--- 
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/ExcelReadStrategyTest.java
+++ 
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/ExcelReadStrategyTest.java
@@ -227,6 +227,143 @@ public class ExcelReadStrategyTest {
         }
     }
 
+    @Test
+    public void testExcelReadFormulaXls() throws IOException, 
URISyntaxException {
+        URL excelFile = 
ExcelReadStrategyTest.class.getResource("/excel/test_read_formula.xls");
+        URL conf = 
ExcelReadStrategyTest.class.getResource("/excel/test_read_excel.conf");
+        Assertions.assertNotNull(excelFile);
+        Assertions.assertNotNull(conf);
+        String excelFilePath = Paths.get(excelFile.toURI()).toString();
+        String confPath = Paths.get(conf.toURI()).toString();
+        Config pluginConfig = ConfigFactory.parseFile(new File(confPath));
+        ExcelReadStrategy excelReadStrategy = new ExcelReadStrategy();
+        LocalConf localConf = new LocalConf(FS_DEFAULT_NAME_DEFAULT);
+        excelReadStrategy.setPluginConfig(pluginConfig);
+        excelReadStrategy.init(localConf);
+
+        List<String> fileNamesByPath = 
excelReadStrategy.getFileNamesByPath(excelFilePath);
+        CatalogTable userDefinedCatalogTable = 
CatalogTableUtil.buildWithConfig(pluginConfig);
+        excelReadStrategy.setCatalogTable(userDefinedCatalogTable);
+        TestCollector testCollector = new TestCollector();
+        excelReadStrategy.read(fileNamesByPath.get(0), "", testCollector);
+
+        for (SeaTunnelRow seaTunnelRow : testCollector.getRows()) {
+            Assertions.assertEquals(seaTunnelRow.getArity(), 14);
+            Assertions.assertEquals(seaTunnelRow.getField(0).getClass(), 
Byte.class);
+            Assertions.assertEquals(seaTunnelRow.getField(1).getClass(), 
Short.class);
+            Assertions.assertEquals(seaTunnelRow.getField(2).getClass(), 
Integer.class);
+            Assertions.assertEquals(seaTunnelRow.getField(3).getClass(), 
Long.class);
+            Assertions.assertEquals(seaTunnelRow.getField(4).getClass(), 
String.class);
+            Assertions.assertEquals(seaTunnelRow.getField(5).getClass(), 
Double.class);
+            Assertions.assertEquals(seaTunnelRow.getField(6).getClass(), 
Float.class);
+            Assertions.assertEquals(seaTunnelRow.getField(7).getClass(), 
BigDecimal.class);
+            Assertions.assertEquals(seaTunnelRow.getField(8).getClass(), 
Boolean.class);
+            Assertions.assertEquals(seaTunnelRow.getField(9).getClass(), 
LinkedHashMap.class);
+            Assertions.assertEquals(seaTunnelRow.getField(10).getClass(), 
String[].class);
+            Assertions.assertEquals(seaTunnelRow.getField(11).getClass(), 
LocalDate.class);
+            Assertions.assertEquals(seaTunnelRow.getField(12).getClass(), 
LocalDateTime.class);
+            Assertions.assertEquals(seaTunnelRow.getField(13).getClass(), 
LocalTime.class);
+            Assertions.assertEquals(seaTunnelRow.getField(0), (byte) 1);
+            Assertions.assertEquals(seaTunnelRow.getField(1), (short) 22);
+            Assertions.assertEquals(seaTunnelRow.getField(2), 333);
+            Assertions.assertEquals(seaTunnelRow.getField(3), 355L);
+            Assertions.assertEquals(seaTunnelRow.getField(4), "Cosmos");
+            Assertions.assertEquals(seaTunnelRow.getField(5), 5.555);
+            Assertions.assertEquals(seaTunnelRow.getField(6), (float) 6.666);
+            Assertions.assertEquals(seaTunnelRow.getField(7), new 
BigDecimal("7.78"));
+            Assertions.assertEquals(seaTunnelRow.getField(8), Boolean.FALSE);
+            Assertions.assertEquals(
+                    seaTunnelRow.getField(9),
+                    new LinkedHashMap<String, String>() {
+                        {
+                            put("name", "Ivan");
+                            put("age", "26");
+                        }
+                    });
+            Assertions.assertArrayEquals(
+                    (String[]) seaTunnelRow.getField(10), new String[] 
{"Ivan", "Dusayi"});
+            Assertions.assertEquals(
+                    seaTunnelRow.getField(11),
+                    DateUtils.parse("2024-01-31", 
DateUtils.Formatter.YYYY_MM_DD));
+            Assertions.assertEquals(
+                    seaTunnelRow.getField(12),
+                    DateTimeUtils.parse(
+                            "2024-01-31 16:00:48", 
DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS));
+            Assertions.assertEquals(
+                    seaTunnelRow.getField(13),
+                    TimeUtils.parse("16:00:48", TimeUtils.Formatter.HH_MM_SS));
+        }
+    }
+
+    @Test
+    public void testExcelReadFormula() throws IOException, URISyntaxException {
+        URL excelFile =
+                
ExcelReadStrategyTest.class.getResource("/excel/test_read_excel_formula.xlsx");
+        URL conf = 
ExcelReadStrategyTest.class.getResource("/excel/test_read_excel.conf");
+        Assertions.assertNotNull(excelFile);
+        Assertions.assertNotNull(conf);
+        String excelFilePath = Paths.get(excelFile.toURI()).toString();
+        String confPath = Paths.get(conf.toURI()).toString();
+        Config pluginConfig = ConfigFactory.parseFile(new File(confPath));
+        ExcelReadStrategy excelReadStrategy = new ExcelReadStrategy();
+        LocalConf localConf = new LocalConf(FS_DEFAULT_NAME_DEFAULT);
+        excelReadStrategy.setPluginConfig(pluginConfig);
+        excelReadStrategy.init(localConf);
+
+        List<String> fileNamesByPath = 
excelReadStrategy.getFileNamesByPath(excelFilePath);
+        CatalogTable userDefinedCatalogTable = 
CatalogTableUtil.buildWithConfig(pluginConfig);
+        excelReadStrategy.setCatalogTable(userDefinedCatalogTable);
+        TestCollector testCollector = new TestCollector();
+        excelReadStrategy.read(fileNamesByPath.get(0), "", testCollector);
+
+        for (SeaTunnelRow seaTunnelRow : testCollector.getRows()) {
+            Assertions.assertEquals(seaTunnelRow.getArity(), 14);
+            Assertions.assertEquals(seaTunnelRow.getField(0).getClass(), 
Byte.class);
+            Assertions.assertEquals(seaTunnelRow.getField(1).getClass(), 
Short.class);
+            Assertions.assertEquals(seaTunnelRow.getField(2).getClass(), 
Integer.class);
+            Assertions.assertEquals(seaTunnelRow.getField(3).getClass(), 
Long.class);
+            Assertions.assertEquals(seaTunnelRow.getField(4).getClass(), 
String.class);
+            Assertions.assertEquals(seaTunnelRow.getField(5).getClass(), 
Double.class);
+            Assertions.assertEquals(seaTunnelRow.getField(6).getClass(), 
Float.class);
+            Assertions.assertEquals(seaTunnelRow.getField(7).getClass(), 
BigDecimal.class);
+            Assertions.assertEquals(seaTunnelRow.getField(8).getClass(), 
Boolean.class);
+            Assertions.assertEquals(seaTunnelRow.getField(9).getClass(), 
LinkedHashMap.class);
+            Assertions.assertEquals(seaTunnelRow.getField(10).getClass(), 
String[].class);
+            Assertions.assertEquals(seaTunnelRow.getField(11).getClass(), 
LocalDate.class);
+            Assertions.assertEquals(seaTunnelRow.getField(12).getClass(), 
LocalDateTime.class);
+            Assertions.assertEquals(seaTunnelRow.getField(13).getClass(), 
LocalTime.class);
+            Assertions.assertEquals(seaTunnelRow.getField(0), (byte) 1);
+            Assertions.assertEquals(seaTunnelRow.getField(1), (short) 22);
+            Assertions.assertEquals(seaTunnelRow.getField(2), 333);
+            Assertions.assertEquals(seaTunnelRow.getField(3), 355L);
+            Assertions.assertEquals(seaTunnelRow.getField(4), "Cosmos");
+            Assertions.assertEquals(seaTunnelRow.getField(5), 5.555);
+            Assertions.assertEquals(seaTunnelRow.getField(6), (float) 6.666);
+            Assertions.assertEquals(seaTunnelRow.getField(7), new 
BigDecimal("7.78"));
+            Assertions.assertEquals(seaTunnelRow.getField(8), Boolean.FALSE);
+            Assertions.assertEquals(
+                    seaTunnelRow.getField(9),
+                    new LinkedHashMap<String, String>() {
+                        {
+                            put("name", "Ivan");
+                            put("age", "26");
+                        }
+                    });
+            Assertions.assertArrayEquals(
+                    (String[]) seaTunnelRow.getField(10), new String[] 
{"Ivan", "Dusayi"});
+            Assertions.assertEquals(
+                    seaTunnelRow.getField(11),
+                    DateUtils.parse("2024-01-31", 
DateUtils.Formatter.YYYY_MM_DD));
+            Assertions.assertEquals(
+                    seaTunnelRow.getField(12),
+                    DateTimeUtils.parse(
+                            "2024-01-31 16:00:48", 
DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS));
+            Assertions.assertEquals(
+                    seaTunnelRow.getField(13),
+                    TimeUtils.parse("16:00:48", TimeUtils.Formatter.HH_MM_SS));
+        }
+    }
+
     @Getter
     public static class TestCollector implements Collector<SeaTunnelRow> {
         private final List<SeaTunnelRow> rows = new ArrayList<>();
diff --git 
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/excel/test_read_excel_formula.xlsx
 
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/excel/test_read_excel_formula.xlsx
new file mode 100644
index 0000000000..8190bd22c0
Binary files /dev/null and 
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/excel/test_read_excel_formula.xlsx
 differ
diff --git 
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/excel/test_read_formula.xls
 
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/excel/test_read_formula.xls
new file mode 100644
index 0000000000..1ef5e34261
Binary files /dev/null and 
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/excel/test_read_formula.xls
 differ

Reply via email to