This is an automated email from the ASF dual-hosted git repository.
dstiggy pushed a commit to branch support/nifi-1.x
in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/support/nifi-1.x by this push:
new f4ee11975e NIFI-13744 Corrected Excel Reader Cell Type Inferencing
f4ee11975e is described below
commit f4ee11975eb051894dd75f99efde931f023fb812
Author: exceptionfactory <[email protected]>
AuthorDate: Fri Oct 11 10:07:20 2024 -0500
NIFI-13744 Corrected Excel Reader Cell Type Inferencing
- Added Cell Field Type Reader with Timestamp detection
This closes #9397
---
.../org/apache/nifi/excel/CellFieldTypeReader.java | 45 ++++
.../nifi/excel/ExcelHeaderSchemaStrategy.java | 19 +-
.../java/org/apache/nifi/excel/ExcelReader.java | 2 +-
.../org/apache/nifi/excel/ExcelRecordReader.java | 45 +++-
.../apache/nifi/excel/ExcelSchemaInference.java | 19 +-
.../nifi/excel/StandardCellFieldTypeReader.java | 154 ++++++++++++
.../nifi/excel/TestExcelHeaderSchemaStrategy.java | 17 +-
.../nifi/excel/TestExcelSchemaInference.java | 198 +++++++++-------
.../excel/TestStandardCellFieldTypeReader.java | 257 +++++++++++++++++++++
9 files changed, 626 insertions(+), 130 deletions(-)
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/CellFieldTypeReader.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/CellFieldTypeReader.java
new file mode 100644
index 0000000000..7e7bdf7e3a
--- /dev/null
+++
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/CellFieldTypeReader.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.excel;
+
+import org.apache.nifi.schema.inference.FieldTypeInference;
+import org.apache.nifi.serialization.record.DataType;
+import org.apache.poi.ss.usermodel.Cell;
+
+import java.util.Map;
+
+/**
+ * Shared abstraction for determining Record Field Data Type from Spreadsheet
Cell Types
+ */
+interface CellFieldTypeReader {
+ /**
+ * Infer Cell Field Type and update Map of Field Type information
+ *
+ * @param cell Spreadsheet Cell can be null
+ * @param fieldName Cell field name for tracking in Field Types required
+ * @param fieldTypes Map of Field Name to Field Type Inference information
required
+ */
+ void inferCellFieldType(Cell cell, String fieldName, Map<String,
FieldTypeInference> fieldTypes);
+
+ /**
+ * Get Record Data Type from Spreadsheet Cell
+ *
+ * @param cell Spreadsheet Cell can be null
+ * @return Record Data Type or null
+ */
+ DataType getCellDataType(Cell cell);
+}
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelHeaderSchemaStrategy.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelHeaderSchemaStrategy.java
index b685da3824..e77ec9803f 100644
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelHeaderSchemaStrategy.java
+++
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelHeaderSchemaStrategy.java
@@ -26,21 +26,17 @@ import
org.apache.nifi.schema.access.SchemaNotFoundException;
import org.apache.nifi.schema.inference.FieldTypeInference;
import org.apache.nifi.schema.inference.TimeValueInference;
import org.apache.nifi.serialization.SimpleRecordSchema;
-import org.apache.nifi.serialization.record.DataType;
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordSchema;
-import org.apache.nifi.util.SchemaInferenceUtil;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
-import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.LinkedHashMap;
import java.util.List;
-import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@@ -55,18 +51,18 @@ public class ExcelHeaderSchemaStrategy implements
SchemaAccessStrategy {
private final PropertyContext context;
private final ComponentLog logger;
- private final TimeValueInference timeValueInference;
+ private final CellFieldTypeReader cellFieldTypeReader;
private final DataFormatter dataFormatter;
- public ExcelHeaderSchemaStrategy(PropertyContext context, ComponentLog
logger, TimeValueInference timeValueInference, Locale locale) {
+ public ExcelHeaderSchemaStrategy(PropertyContext context, ComponentLog
logger, TimeValueInference timeValueInference) {
this.context = context;
this.logger = logger;
- this.timeValueInference = timeValueInference;
- this.dataFormatter = locale == null ? new DataFormatter() : new
DataFormatter(locale);
+ this.cellFieldTypeReader = new
StandardCellFieldTypeReader(timeValueInference);
+ this.dataFormatter = new DataFormatter();
}
@Override
- public RecordSchema getSchema(Map<String, String> variables, InputStream
contentStream, RecordSchema readSchema) throws SchemaNotFoundException,
IOException {
+ public RecordSchema getSchema(Map<String, String> variables, InputStream
contentStream, RecordSchema readSchema) throws SchemaNotFoundException {
if (this.context == null) {
throw new SchemaNotFoundException("Schema Access Strategy intended
only for validation purposes and cannot obtain schema");
}
@@ -140,10 +136,7 @@ public class ExcelHeaderSchemaStrategy implements
SchemaAccessStrategy {
.forEach(index -> {
final Cell cell = row.getCell(index);
final String fieldName = fieldNames.get(index);
- final FieldTypeInference typeInference =
typeMap.computeIfAbsent(fieldName, key -> new FieldTypeInference());
- final String formattedCellValue =
dataFormatter.formatCellValue(cell);
- final DataType dataType =
SchemaInferenceUtil.getDataType(formattedCellValue, timeValueInference);
- typeInference.addPossibleDataType(dataType);
+ cellFieldTypeReader.inferCellFieldType(cell,
fieldName, typeMap);
});
}
}
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
index 73e097ad32..7a8fabbe56 100644
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
+++
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
@@ -187,7 +187,7 @@ public class ExcelReader extends SchemaRegistryService
implements RecordReaderFa
@Override
protected SchemaAccessStrategy getSchemaAccessStrategy(final String
allowableValue, final SchemaRegistry schemaRegistry, final PropertyContext
context) {
if
(allowableValue.equalsIgnoreCase(ExcelHeaderSchemaStrategy.USE_STARTING_ROW.getValue()))
{
- return new ExcelHeaderSchemaStrategy(context, getLogger(), new
TimeValueInference(dateFormat, timeFormat, timestampFormat), null);
+ return new ExcelHeaderSchemaStrategy(context, getLogger(), new
TimeValueInference(dateFormat, timeFormat, timestampFormat));
} else if
(SchemaInferenceUtil.INFER_SCHEMA.getValue().equals(allowableValue)) {
final RecordSourceFactory<Row> sourceFactory = (variables, in) ->
new ExcelRecordSource(in, context, variables, getLogger());
final SchemaInferenceEngine<Row> inference = new
ExcelSchemaInference(new TimeValueInference(dateFormat, timeFormat,
timestampFormat));
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
index c18a31d1b2..65d4a89e65 100644
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
+++
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
@@ -26,6 +26,7 @@ import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.serialization.record.util.DataTypeUtils;
import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
@@ -143,22 +144,50 @@ public class ExcelRecordReader implements RecordReader {
return currentRowValues;
}
- private static Object getCellValue(Cell cell) {
- if (cell != null) {
- switch (cell.getCellType()) {
+ private static Object getCellValue(final Cell cell) {
+ final Object cellValue;
+
+ if (cell == null) {
+ cellValue = null;
+ } else {
+ final CellType cellType = cell.getCellType();
+ switch (cellType) {
case _NONE:
case BLANK:
case ERROR:
- case FORMULA:
case STRING:
- return cell.getStringCellValue();
+ cellValue = cell.getStringCellValue();
+ break;
case NUMERIC:
- return DateUtil.isCellDateFormatted(cell) ?
cell.getDateCellValue() : cell.getNumericCellValue();
+ cellValue = DateUtil.isCellDateFormatted(cell) ?
cell.getDateCellValue() : cell.getNumericCellValue();
+ break;
case BOOLEAN:
- return cell.getBooleanCellValue();
+ cellValue = cell.getBooleanCellValue();
+ break;
+ case FORMULA:
+ cellValue = getFormulaCellValue(cell);
+ break;
+ default:
+ return null;
}
}
- return null;
+
+ return cellValue;
+ }
+
+ private static Object getFormulaCellValue(final Cell cell) {
+ final CellType formulaResultType = cell.getCachedFormulaResultType();
+ switch (formulaResultType) {
+ case BOOLEAN:
+ return cell.getBooleanCellValue();
+ case STRING:
+ case ERROR:
+ return cell.getStringCellValue();
+ case NUMERIC:
+ return DateUtil.isCellDateFormatted(cell) ?
cell.getDateCellValue() : cell.getNumericCellValue();
+ default:
+ return null;
+ }
}
private Object convert(final Object value, final DataType dataType, final
String fieldName) {
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelSchemaInference.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelSchemaInference.java
index f082587cca..679ec38048 100644
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelSchemaInference.java
+++
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelSchemaInference.java
@@ -21,33 +21,23 @@ import org.apache.nifi.schema.inference.RecordSource;
import org.apache.nifi.schema.inference.SchemaInferenceEngine;
import org.apache.nifi.schema.inference.TimeValueInference;
import org.apache.nifi.serialization.SimpleRecordSchema;
-import org.apache.nifi.serialization.record.DataType;
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordSchema;
-import org.apache.nifi.util.SchemaInferenceUtil;
import org.apache.poi.ss.usermodel.Cell;
-import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.List;
-import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class ExcelSchemaInference implements SchemaInferenceEngine<Row> {
- private final TimeValueInference timeValueInference;
- private final DataFormatter dataFormatter;
+ private final CellFieldTypeReader cellFieldTypeReader;
public ExcelSchemaInference(TimeValueInference timeValueInference) {
- this(timeValueInference, null);
- }
-
- public ExcelSchemaInference(TimeValueInference timeValueInference, Locale
locale) {
- this.timeValueInference = timeValueInference;
- this.dataFormatter = locale == null ? new DataFormatter() : new
DataFormatter(locale);
+ this.cellFieldTypeReader = new
StandardCellFieldTypeReader(timeValueInference);
}
@Override
@@ -66,10 +56,7 @@ public class ExcelSchemaInference implements
SchemaInferenceEngine<Row> {
.forEach(index -> {
final Cell cell = row.getCell(index);
final String fieldName = ExcelUtils.FIELD_NAME_PREFIX
+ index;
- final FieldTypeInference typeInference =
typeMap.computeIfAbsent(fieldName, key -> new FieldTypeInference());
- final String formattedCellValue =
dataFormatter.formatCellValue(cell);
- final DataType dataType =
SchemaInferenceUtil.getDataType(formattedCellValue, timeValueInference);
- typeInference.addPossibleDataType(dataType);
+ cellFieldTypeReader.inferCellFieldType(cell,
fieldName, typeMap);
});
}
}
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/StandardCellFieldTypeReader.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/StandardCellFieldTypeReader.java
new file mode 100644
index 0000000000..3739781f76
--- /dev/null
+++
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/StandardCellFieldTypeReader.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.excel;
+
+import org.apache.nifi.schema.inference.FieldTypeInference;
+import org.apache.nifi.schema.inference.TimeValueInference;
+import org.apache.nifi.serialization.record.DataType;
+import org.apache.nifi.serialization.record.RecordFieldType;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.CellType;
+import org.apache.poi.ss.usermodel.DateUtil;
+
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * Standard implementation of Cell Field Type Reader uses Cell Type and Cell
Value information with inference based on Timestamp formats
+ */
+class StandardCellFieldTypeReader implements CellFieldTypeReader {
+ private final TimeValueInference timeValueInference;
+
+ /**
+ * Standard Cell Field Type Reader constructor with Time Value Inference
for handling STRING Cell Types that may contain values with Timestamps
+ *
+ * @param timeValueInference Time Value Inference required for STRING Cell
Type evaluation
+ */
+ StandardCellFieldTypeReader(final TimeValueInference timeValueInference) {
+ this.timeValueInference = Objects.requireNonNull(timeValueInference,
"Time Value Inference required");
+ }
+
+ /**
+ * Infer Cell Field Type and append possible Data Types to mapped Field
Type Inference information
+ *
+ * @param cell Spreadsheet Cell can be null
+ * @param fieldName Cell field name for tracking in Field Types required
+ * @param fieldTypes Map of Field Name to Field Type Inference information
required
+ */
+ @Override
+ public void inferCellFieldType(final Cell cell, final String fieldName,
final Map<String, FieldTypeInference> fieldTypes) {
+ Objects.requireNonNull(fieldName, "Field Name required");
+ Objects.requireNonNull(fieldTypes, "Field Types required");
+
+ final FieldTypeInference fieldTypeInference =
fieldTypes.computeIfAbsent(fieldName, key -> new FieldTypeInference());
+ final DataType dataType = getCellDataType(cell);
+ fieldTypeInference.addPossibleDataType(dataType);
+ }
+
+ /**
+ * Get Record Data Type from Spreadsheet Cell Type and additional
resolution of NUMERIC and STRING types
+ *
+ * @param cell Spreadsheet Cell can be null
+ * @return Record Data Type or null
+ */
+ @Override
+ public DataType getCellDataType(final Cell cell) {
+ if (cell == null) {
+ return null;
+ }
+
+ final CellType cellType = cell.getCellType();
+
+ final DataType dataType;
+
+ if (CellType.NUMERIC == cellType) {
+ // Date Formatting check limited to NUMERIC Cell Types
+ final double numericCellValue = cell.getNumericCellValue();
+ if (DateUtil.isCellDateFormatted(cell)) {
+ dataType = getDateTimeDataType(numericCellValue);
+ } else {
+ if (isWholeNumber(numericCellValue)) {
+ dataType = RecordFieldType.LONG.getDataType();
+ } else {
+ // Default to DOUBLE for NUMERIC values following
cell.getNumericCellValue()
+ dataType = RecordFieldType.DOUBLE.getDataType();
+ }
+ }
+ } else if (CellType.BOOLEAN == cellType) {
+ dataType = RecordFieldType.BOOLEAN.getDataType();
+ } else if (CellType.STRING == cellType) {
+ final String cellValue = cell.getStringCellValue();
+ // Attempt Time Value inference for STRING cell values
+ final Optional<DataType> timeDataType =
timeValueInference.getDataType(cellValue);
+ dataType =
timeDataType.orElse(RecordFieldType.STRING.getDataType());
+ } else if (CellType.FORMULA == cellType) {
+ dataType = getFormulaResultDataType(cell);
+ } else {
+ // Default to null for known and unknown Cell Types
+ dataType = null;
+ }
+
+ return dataType;
+ }
+
+ private DataType getDateTimeDataType(final double numericCellValue) {
+ final DataType dataType;
+
+ if (isWholeNumber(numericCellValue)) {
+ // Numbers without decimal fractions indicate Dates without Times
+ dataType = RecordFieldType.DATE.getDataType();
+ } else if (numericCellValue < 1) {
+ // Decimal fractions indicate Times without Dates
+ dataType = RecordFieldType.TIME.getDataType();
+ } else {
+ dataType = RecordFieldType.TIMESTAMP.getDataType();
+ }
+
+ return dataType;
+ }
+
+ private DataType getFormulaResultDataType(final Cell cell) {
+ final DataType dataType;
+
+ final CellType formulaResultType = cell.getCachedFormulaResultType();
+ if (CellType.BOOLEAN == formulaResultType) {
+ dataType = RecordFieldType.BOOLEAN.getDataType();
+ } else if (CellType.STRING == formulaResultType) {
+ dataType = RecordFieldType.STRING.getDataType();
+ } else if (CellType.NUMERIC == formulaResultType) {
+ // Date Formatting check limited to NUMERIC Cell Types without
Conditional Formatting Evaluator
+ if (DateUtil.isCellDateFormatted(cell)) {
+ final double numericCellValue = cell.getNumericCellValue();
+ dataType = getDateTimeDataType(numericCellValue);
+ } else {
+ // Default to DOUBLE for NUMERIC values following
cell.getNumericCellValue()
+ dataType = RecordFieldType.DOUBLE.getDataType();
+ }
+ } else {
+ // Default to null for known and unknown Formula Result Cell Types
+ dataType = null;
+ }
+
+ return dataType;
+ }
+
+ private boolean isWholeNumber(final double numericCellValue) {
+ final long roundedCellValue = (long) numericCellValue;
+ return roundedCellValue == numericCellValue;
+ }
+}
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelHeaderSchemaStrategy.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelHeaderSchemaStrategy.java
index 80535eee31..0513f38547 100644
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelHeaderSchemaStrategy.java
+++
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelHeaderSchemaStrategy.java
@@ -48,6 +48,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
@ExtendWith(MockitoExtension.class)
public class TestExcelHeaderSchemaStrategy {
private static final TimeValueInference TIME_VALUE_INFERENCE = new
TimeValueInference("MM/dd/yyyy", "HH:mm:ss.SSS", "yyyy/MM/dd/ HH:mm");
+
@Mock
ComponentLog logger;
@@ -57,7 +58,7 @@ public class TestExcelHeaderSchemaStrategy {
final ByteArrayOutputStream outputStream =
getSingleSheetWorkbook(data);
final Map<PropertyDescriptor, String> properties = new HashMap<>();
final ConfigurationContext context = new
MockConfigurationContext(properties, null, null);
- final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, null, null);
+ final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE);
try (final InputStream inputStream = new
ByteArrayInputStream(outputStream.toByteArray())) {
SchemaNotFoundException schemaNotFoundException =
assertThrows(SchemaNotFoundException.class, () ->
schemaStrategy.getSchema(null, inputStream, null));
@@ -71,7 +72,7 @@ public class TestExcelHeaderSchemaStrategy {
final ByteArrayOutputStream outputStream =
getSingleSheetWorkbook(data);
final Map<PropertyDescriptor, String> properties = new HashMap<>();
final ConfigurationContext context = new
MockConfigurationContext(properties, null, null);
- final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE, null);
+ final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE);
try (final InputStream inputStream = new
ByteArrayInputStream(outputStream.toByteArray())) {
RecordSchema schema = schemaStrategy.getSchema(null, inputStream,
null);
@@ -86,7 +87,7 @@ public class TestExcelHeaderSchemaStrategy {
final ByteArrayOutputStream outputStream =
getSingleSheetWorkbook(data);
final Map<PropertyDescriptor, String> properties = new HashMap<>();
final ConfigurationContext context = new
MockConfigurationContext(properties, null, null);
- final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE, null);
+ final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE);
try (final InputStream inputStream = new
ByteArrayInputStream(outputStream.toByteArray())) {
SchemaNotFoundException schemaNotFoundException =
assertThrows(SchemaNotFoundException.class, () ->
schemaStrategy.getSchema(null, inputStream, null));
@@ -100,8 +101,7 @@ public class TestExcelHeaderSchemaStrategy {
final ByteArrayOutputStream outputStream =
getSingleSheetWorkbook(data);
final Map<PropertyDescriptor, String> properties = new HashMap<>();
final ConfigurationContext context = new
MockConfigurationContext(properties, null, null);
- final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE, null);
- assertTrue(data.length - 1 <
ExcelHeaderSchemaStrategy.NUM_ROWS_TO_DETERMINE_TYPES);
+ final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE);
try (final InputStream inputStream = new
ByteArrayInputStream(outputStream.toByteArray())) {
assertDoesNotThrow(() -> schemaStrategy.getSchema(null,
inputStream, null));
@@ -117,7 +117,7 @@ public class TestExcelHeaderSchemaStrategy {
final ByteArrayOutputStream outputStream =
getSingleSheetWorkbook(data);
final Map<PropertyDescriptor, String> properties = new HashMap<>();
final ConfigurationContext context = new
MockConfigurationContext(properties, null, null);
- final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE, null);
+ final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE);
try (final InputStream inputStream = new
ByteArrayInputStream(outputStream.toByteArray())) {
assertDoesNotThrow(() -> schemaStrategy.getSchema(null,
inputStream, null));
@@ -129,12 +129,11 @@ public class TestExcelHeaderSchemaStrategy {
Object[][] data = {{"ID", "First", "Middle"}, {1, "One", "O"}, {2,
"Two", "T"}, {3, "Three", "T"},
{4, "Four", "F"}, {5, "Five", "F"}, {6, "Six", "S"}, {7,
"Seven", "S"}, {8, "Eight", "E"},
{9, "Nine", "N"}, {10, "Ten", "T"}, {11, "Eleven", "E"}};
- assertTrue(data.length - 1 >
ExcelHeaderSchemaStrategy.NUM_ROWS_TO_DETERMINE_TYPES);
final ByteArrayOutputStream outputStream =
getSingleSheetWorkbook(data);
final Map<PropertyDescriptor, String> properties = new HashMap<>();
final ConfigurationContext context = new
MockConfigurationContext(properties, null, null);
- final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE, null);
+ final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE);
try (final InputStream inputStream = new
ByteArrayInputStream(outputStream.toByteArray())) {
assertDoesNotThrow(() -> schemaStrategy.getSchema(null,
inputStream, null));
@@ -147,7 +146,7 @@ public class TestExcelHeaderSchemaStrategy {
final ByteArrayOutputStream outputStream =
getSingleSheetWorkbook(data);
final Map<PropertyDescriptor, String> properties = new HashMap<>();
final ConfigurationContext context = new
MockConfigurationContext(properties, null, null);
- final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE, null);
+ final ExcelHeaderSchemaStrategy schemaStrategy = new
ExcelHeaderSchemaStrategy(context, logger, TIME_VALUE_INFERENCE);
try (final InputStream inputStream = new
ByteArrayInputStream(outputStream.toByteArray())) {
SchemaNotFoundException schemaNotFoundException =
assertThrows(SchemaNotFoundException.class, () ->
schemaStrategy.getSchema(null, inputStream, null));
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelSchemaInference.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelSchemaInference.java
index f2f7d45866..8565f95986 100644
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelSchemaInference.java
+++
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelSchemaInference.java
@@ -16,33 +16,34 @@
*/
package org.apache.nifi.excel;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.context.PropertyContext;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.schema.inference.InferSchemaAccessStrategy;
import org.apache.nifi.schema.inference.TimeValueInference;
+import org.apache.nifi.serialization.record.DataType;
+import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.util.MockConfigurationContext;
+import org.apache.poi.ss.usermodel.CellType;
+import org.apache.poi.xssf.usermodel.XSSFCell;
+import org.apache.poi.xssf.usermodel.XSSFRow;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.Arguments;
-import org.junit.jupiter.params.provider.MethodSource;
import org.mockito.Mock;
-import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.stream.Stream;
-
import static org.junit.jupiter.api.Assertions.assertEquals;
@ExtendWith(MockitoExtension.class)
@@ -51,73 +52,55 @@ public class TestExcelSchemaInference {
private static final String EXPECTED_SECOND_FIELD_NAME =
ExcelUtils.FIELD_NAME_PREFIX + "1";
private static final String EXPECTED_THIRD_FIELD_NAME =
ExcelUtils.FIELD_NAME_PREFIX + "2";
private static final String EXPECTED_FOURTH_FIELD_NAME =
ExcelUtils.FIELD_NAME_PREFIX + "3";
- private final TimeValueInference timestampInference = new
TimeValueInference("MM/dd/yyyy", "HH:mm:ss.SSS", "yyyy/MM/dd/ HH:mm");
-
- @Mock
- ComponentLog logger;
- @ParameterizedTest
- @MethodSource("getLocales")
- public void testInferenceAgainstDifferentLocales(Locale locale) throws
IOException {
- final Map<PropertyDescriptor, String> properties = new HashMap<>();
- new ExcelReader().getSupportedPropertyDescriptors().forEach(prop ->
properties.put(prop, prop.getDefaultValue()));
- final PropertyContext context = new
MockConfigurationContext(properties, null);
+ private static final String SIMPLE_FORMATTING_PATH =
"/excel/simpleDataFormatting.xlsx";
- try (final InputStream inputStream =
getResourceStream("/excel/numbers.xlsx")) {
- final InferSchemaAccessStrategy<?> accessStrategy = new
InferSchemaAccessStrategy<>(
- (variables, content) -> new ExcelRecordSource(content,
context, variables, logger),
- new ExcelSchemaInference(timestampInference, locale),
logger);
- final RecordSchema schema = accessStrategy.getSchema(null,
inputStream, null);
- final List<String> fieldNames = schema.getFieldNames();
- assertEquals(Collections.singletonList(EXPECTED_FIRST_FIELD_NAME),
fieldNames);
-
- if (Locale.FRENCH.equals(locale)) {
- assertEquals(RecordFieldType.STRING,
schema.getDataType(EXPECTED_FIRST_FIELD_NAME).get().getFieldType());
- } else {
-
assertEquals(RecordFieldType.CHOICE.getChoiceDataType(RecordFieldType.FLOAT.getDataType(),
RecordFieldType.STRING.getDataType()),
- schema.getDataType(EXPECTED_FIRST_FIELD_NAME).get());
- }
- }
- }
+ @Mock
+ private ComponentLog logger;
- private static Stream<Arguments> getLocales() {
- Locale hindi = new Locale("hin");
- return Stream.of(
- Arguments.of(Locale.ENGLISH),
- Arguments.of(hindi),
- Arguments.of(Locale.JAPANESE),
- Arguments.of(Locale.FRENCH)
- );
- }
+ @Mock
+ private TimeValueInference timeValueInference;
@Test
public void testInferenceIncludesAllRecords() throws IOException {
final Map<PropertyDescriptor, String> properties = new HashMap<>();
new ExcelReader().getSupportedPropertyDescriptors().forEach(prop ->
properties.put(prop, prop.getDefaultValue()));
- final PropertyContext context = new
MockConfigurationContext(properties, null);
+ final PropertyContext context = new
MockConfigurationContext(properties, null, null);
final RecordSchema schema;
- try (final InputStream inputStream =
getResourceStream("/excel/simpleDataFormatting.xlsx")) {
+ try (final InputStream inputStream = getResourceStream()) {
final InferSchemaAccessStrategy<?> accessStrategy = new
InferSchemaAccessStrategy<>(
(variables, content) -> new ExcelRecordSource(content,
context, variables, logger),
- new ExcelSchemaInference(timestampInference),
Mockito.mock(ComponentLog.class));
+ new ExcelSchemaInference(timeValueInference), logger);
schema = accessStrategy.getSchema(null, inputStream, null);
}
- final List<String> fieldNames = schema.getFieldNames();
- assertEquals(Arrays.asList(EXPECTED_FIRST_FIELD_NAME,
EXPECTED_SECOND_FIELD_NAME,
- EXPECTED_THIRD_FIELD_NAME, EXPECTED_FOURTH_FIELD_NAME),
fieldNames);
-
assertEquals(RecordFieldType.CHOICE.getChoiceDataType(RecordFieldType.INT.getDataType(),
- RecordFieldType.STRING.getDataType()),
schema.getDataType(EXPECTED_FIRST_FIELD_NAME).get());
- assertEquals(RecordFieldType.CHOICE.getChoiceDataType(
- RecordFieldType.TIMESTAMP.getDataType("yyyy/MM/dd/
HH:mm"),
- RecordFieldType.DATE.getDataType("MM/dd/yyyy"),
+ assertFieldNamesFound(schema);
+ assertFieldDataTypeEquals(schema, EXPECTED_FIRST_FIELD_NAME,
+ RecordFieldType.CHOICE.getChoiceDataType(
+ RecordFieldType.LONG.getDataType(),
RecordFieldType.STRING.getDataType()
- ),
- schema.getDataType(EXPECTED_SECOND_FIELD_NAME).get());
- assertEquals(RecordFieldType.STRING,
schema.getDataType(EXPECTED_THIRD_FIELD_NAME).get().getFieldType());
-
assertEquals(RecordFieldType.CHOICE.getChoiceDataType(RecordFieldType.BOOLEAN.getDataType(),
- RecordFieldType.STRING.getDataType()),
schema.getDataType(EXPECTED_FOURTH_FIELD_NAME).get());
+ )
+ );
+ assertFieldDataTypeEquals(schema, EXPECTED_SECOND_FIELD_NAME,
+ RecordFieldType.CHOICE.getChoiceDataType(
+ // Assert Timestamp Data Type with standard Date and
Time Pattern
+ RecordFieldType.TIMESTAMP.getDataType(),
+ RecordFieldType.STRING.getDataType()
+ )
+ );
+ assertFieldDataTypeEquals(schema, EXPECTED_THIRD_FIELD_NAME,
+ RecordFieldType.CHOICE.getChoiceDataType(
+ RecordFieldType.DOUBLE.getDataType(),
+ RecordFieldType.STRING.getDataType()
+ )
+ );
+ assertFieldDataTypeEquals(schema, EXPECTED_FOURTH_FIELD_NAME,
+ RecordFieldType.CHOICE.getChoiceDataType(
+ RecordFieldType.BOOLEAN.getDataType(),
+ RecordFieldType.STRING.getDataType()
+ )
+ );
}
@Test
@@ -126,38 +109,87 @@ public class TestExcelSchemaInference {
new ExcelReader().getSupportedPropertyDescriptors().forEach(prop ->
properties.put(prop, prop.getDefaultValue()));
properties.put(ExcelReader.REQUIRED_SHEETS, "${required.sheets}");
properties.put(ExcelReader.STARTING_ROW, "${rows.to.skip}");
- final PropertyContext context = new
MockConfigurationContext(properties, null);
+ final PropertyContext context = new
MockConfigurationContext(properties, null, null);
final Map<String, String> attributes = new HashMap<>();
attributes.put("required.sheets", "Sheet1");
attributes.put("rows.to.skip", "2");
final RecordSchema schema;
- try (final InputStream inputStream =
getResourceStream("/excel/simpleDataFormatting.xlsx")) {
+ try (final InputStream inputStream = getResourceStream()) {
final InferSchemaAccessStrategy<?> accessStrategy = new
InferSchemaAccessStrategy<>(
(variables, content) -> new ExcelRecordSource(content,
context, variables, logger),
- new ExcelSchemaInference(timestampInference),
Mockito.mock(ComponentLog.class));
+ new ExcelSchemaInference(timeValueInference), logger);
schema = accessStrategy.getSchema(attributes, inputStream, null);
}
- final List<String> fieldNames = schema.getFieldNames();
- assertEquals(Arrays.asList(EXPECTED_FIRST_FIELD_NAME,
EXPECTED_SECOND_FIELD_NAME,
- EXPECTED_THIRD_FIELD_NAME, EXPECTED_FOURTH_FIELD_NAME),
fieldNames);
- assertEquals(RecordFieldType.INT.getDataType(),
schema.getDataType(EXPECTED_FIRST_FIELD_NAME).get());
- assertEquals(RecordFieldType.CHOICE.getChoiceDataType(
- RecordFieldType.TIMESTAMP.getDataType("yyyy/MM/dd/
HH:mm"),
- RecordFieldType.DATE.getDataType("MM/dd/yyyy"),
- RecordFieldType.STRING.getDataType()
- ),
- schema.getDataType(EXPECTED_SECOND_FIELD_NAME).get());
- assertEquals(RecordFieldType.STRING,
schema.getDataType(EXPECTED_THIRD_FIELD_NAME).get().getFieldType());
- assertEquals(RecordFieldType.BOOLEAN.getDataType(),
schema.getDataType(EXPECTED_FOURTH_FIELD_NAME).get());
+ assertFieldNamesFound(schema);
+
+ assertFieldDataTypeEquals(schema, EXPECTED_FIRST_FIELD_NAME,
RecordFieldType.LONG.getDataType());
+ assertFieldDataTypeEquals(schema, EXPECTED_SECOND_FIELD_NAME,
RecordFieldType.TIMESTAMP.getDataType());
+ assertFieldDataTypeEquals(schema, EXPECTED_THIRD_FIELD_NAME,
RecordFieldType.DOUBLE.getDataType());
+ assertFieldDataTypeEquals(schema, EXPECTED_FOURTH_FIELD_NAME,
RecordFieldType.BOOLEAN.getDataType());
+ }
+
+ @Test
+ public void testSchemaInferenceTimestampString() throws IOException {
+ final Map<PropertyDescriptor, String> properties = new HashMap<>();
+ new ExcelReader().getSupportedPropertyDescriptors().forEach(prop ->
properties.put(prop, prop.getDefaultValue()));
+ final PropertyContext context = new
MockConfigurationContext(properties, null, null);
+
+ final String timestampCellValue = "2020-01-01 12:30:45";
+
+ final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+ try (XSSFWorkbook workbook = new XSSFWorkbook()) {
+ final XSSFSheet sheet =
workbook.createSheet(TestExcelSchemaInference.class.getSimpleName());
+
+ final XSSFRow row = sheet.createRow(1);
+ final XSSFCell cell = row.createCell(0, CellType.STRING);
+ cell.setCellValue(timestampCellValue);
+
+ workbook.write(outputStream);
+ }
+
+ final DataType timestampDataType =
RecordFieldType.TIMESTAMP.getDataType();
+ final String timestampFormat = timestampDataType.getFormat();
+ final TimeValueInference timestampValueInference = new
TimeValueInference(RecordFieldType.DATE.getDefaultFormat(),
RecordFieldType.TIME.getDefaultFormat(), timestampFormat);
+
+ final RecordSchema schema;
+ try (final InputStream inputStream = new
ByteArrayInputStream(outputStream.toByteArray())) {
+ final InferSchemaAccessStrategy<?> accessStrategy = new
InferSchemaAccessStrategy<>(
+ (variables, content) -> new ExcelRecordSource(content,
context, variables, logger),
+ new ExcelSchemaInference(timestampValueInference), logger);
+ schema = accessStrategy.getSchema(null, inputStream, null);
+ }
+
+ assertEquals(1, schema.getFieldCount());
+
+ final RecordField firstField = schema.getField(0);
+ assertEquals(RecordFieldType.TIMESTAMP.getDataType(),
firstField.getDataType());
}
- private InputStream getResourceStream(final String relativePath) {
- final InputStream resourceStream =
getClass().getResourceAsStream(relativePath);
+ private InputStream getResourceStream() {
+ final InputStream resourceStream =
getClass().getResourceAsStream(SIMPLE_FORMATTING_PATH);
if (resourceStream == null) {
- throw new IllegalStateException(String.format("Resource [%s] not
found", relativePath));
+ throw new IllegalStateException(String.format("Resource [%s] not
found", SIMPLE_FORMATTING_PATH));
}
return resourceStream;
}
+
+ private void assertFieldDataTypeEquals(final RecordSchema schema, final
String fieldName, final DataType expectedDataType) {
+ final DataType fieldDataType =
schema.getDataType(fieldName).orElse(null);
+ assertEquals(expectedDataType, fieldDataType);
+ }
+
+ private void assertFieldNamesFound(final RecordSchema schema) {
+ final List<String> fieldNames = schema.getFieldNames();
+ assertEquals(
+ Arrays.asList(
+ EXPECTED_FIRST_FIELD_NAME,
+ EXPECTED_SECOND_FIELD_NAME,
+ EXPECTED_THIRD_FIELD_NAME,
+ EXPECTED_FOURTH_FIELD_NAME
+ ),
+ fieldNames
+ );
+ }
}
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestStandardCellFieldTypeReader.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestStandardCellFieldTypeReader.java
new file mode 100644
index 0000000000..746abcf8e9
--- /dev/null
+++
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestStandardCellFieldTypeReader.java
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.excel;
+
+import org.apache.nifi.schema.inference.FieldTypeInference;
+import org.apache.nifi.schema.inference.TimeValueInference;
+import org.apache.nifi.serialization.record.DataType;
+import org.apache.nifi.serialization.record.RecordFieldType;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.CellStyle;
+import org.apache.poi.ss.usermodel.CellType;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.when;
+
+@ExtendWith(MockitoExtension.class)
+class TestStandardCellFieldTypeReader {
+
+ private static final String TIMESTAMP_FORMATTED = "2020-01-01 12:00:00";
+
+ private static final double NUMERIC_DOUBLE = 123.45;
+
+ private static final Long NUMERIC_LONG = Long.MAX_VALUE;
+
+ private static final double NUMERIC_TIMESTAMP = 40909.417;
+
+ private static final double NUMERIC_DATE = 40909;
+
+ private static final double NUMERIC_TIME = 0.417;
+
+ private static final short EXCEL_INTERNAL_DATE_FORMAT = 14;
+
+ private static final String FIELD_NAME = "FirstField";
+
+ @Mock
+ private Cell cell;
+
+ @Mock
+ private CellStyle cellStyle;
+
+ @Mock
+ private TimeValueInference timeValueInference;
+
+ private StandardCellFieldTypeReader reader;
+
+ @BeforeEach
+ void setReader() {
+ reader = new StandardCellFieldTypeReader(timeValueInference);
+ }
+
+ @Test
+ void testGetCellDataTypeNullCell() {
+ final DataType dataType = reader.getCellDataType(null);
+
+ assertNull(dataType);
+ }
+
+ @Test
+ void testGetCellDataTypeBlank() {
+ when(cell.getCellType()).thenReturn(CellType.BLANK);
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertNull(dataType);
+ }
+
+ @Test
+ void testGetCellDataTypeError() {
+ when(cell.getCellType()).thenReturn(CellType.ERROR);
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertNull(dataType);
+ }
+
+ @Test
+ void testGetCellDataTypeFormulaBoolean() {
+ when(cell.getCellType()).thenReturn(CellType.FORMULA);
+ when(cell.getCachedFormulaResultType()).thenReturn(CellType.BOOLEAN);
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertEquals(RecordFieldType.BOOLEAN.getDataType(), dataType);
+ }
+
+ @Test
+ void testGetCellDataTypeFormulaString() {
+ when(cell.getCellType()).thenReturn(CellType.FORMULA);
+ when(cell.getCachedFormulaResultType()).thenReturn(CellType.STRING);
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertEquals(RecordFieldType.STRING.getDataType(), dataType);
+ }
+
+ @Test
+ void testGetCellDataTypeFormulaNumericDouble() {
+ when(cell.getCellType()).thenReturn(CellType.FORMULA);
+ when(cell.getCachedFormulaResultType()).thenReturn(CellType.NUMERIC);
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertEquals(RecordFieldType.DOUBLE.getDataType(), dataType);
+ }
+
+ @Test
+ void testGetCellDataTypeFormulaNumericDate() {
+ assertFormulaNumericDateTimeDataTypeFound(NUMERIC_DATE,
RecordFieldType.DATE.getDataType());
+ }
+
+ @Test
+ void testGetCellDataTypeFormulaNumericTime() {
+ assertFormulaNumericDateTimeDataTypeFound(NUMERIC_TIME,
RecordFieldType.TIME.getDataType());
+ }
+
+ @Test
+ void testGetCellDataTypeFormulaNumericTimestamp() {
+ assertFormulaNumericDateTimeDataTypeFound(NUMERIC_TIMESTAMP,
RecordFieldType.TIMESTAMP.getDataType());
+ }
+
+ @Test
+ void testGetCellDataTypeFormulaError() {
+ when(cell.getCellType()).thenReturn(CellType.FORMULA);
+ when(cell.getCachedFormulaResultType()).thenReturn(CellType.ERROR);
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertNull(dataType);
+ }
+
+ @Test
+ void testGetCellDataTypeBoolean() {
+ when(cell.getCellType()).thenReturn(CellType.BOOLEAN);
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertEquals(RecordFieldType.BOOLEAN.getDataType(), dataType);
+ }
+
+ @Test
+ void testGetCellDataTypeString() {
+ when(cell.getCellType()).thenReturn(CellType.STRING);
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertEquals(RecordFieldType.STRING.getDataType(), dataType);
+ }
+
+ @Test
+ void testGetCellDataTypeStringTimestamp() {
+ final DataType timestampDataType =
RecordFieldType.TIMESTAMP.getDataType();
+
+ when(cell.getCellType()).thenReturn(CellType.STRING);
+ when(cell.getStringCellValue()).thenReturn(TIMESTAMP_FORMATTED);
+
when(timeValueInference.getDataType(eq(TIMESTAMP_FORMATTED))).thenReturn(Optional.of(timestampDataType));
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertEquals(timestampDataType, dataType);
+ }
+
+ @Test
+ void testGetCellDataTypeNumericDouble() {
+ when(cell.getCellType()).thenReturn(CellType.NUMERIC);
+ when(cell.getNumericCellValue()).thenReturn(NUMERIC_DOUBLE);
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertEquals(RecordFieldType.DOUBLE.getDataType(), dataType);
+ }
+
+ @Test
+ void testGetCellDataTypeNumericLong() {
+ when(cell.getCellType()).thenReturn(CellType.NUMERIC);
+
when(cell.getNumericCellValue()).thenReturn(NUMERIC_LONG.doubleValue());
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertEquals(RecordFieldType.LONG.getDataType(), dataType);
+ }
+
+ @Test
+ void testGetCellDataTypeNumericDate() {
+ assertNumericDateTimeDataTypeFound(NUMERIC_DATE,
RecordFieldType.DATE.getDataType());
+ }
+
+ @Test
+ void testGetCellDataTypeNumericTime() {
+ assertNumericDateTimeDataTypeFound(NUMERIC_TIME,
RecordFieldType.TIME.getDataType());
+ }
+
+ @Test
+ void testGetCellDataTypeNumericTimestamp() {
+ assertNumericDateTimeDataTypeFound(NUMERIC_TIMESTAMP,
RecordFieldType.TIMESTAMP.getDataType());
+ }
+
+ @Test
+ void testInferCellFieldType() {
+ final Map<String, FieldTypeInference> fieldTypes = new HashMap<>();
+
+ reader.inferCellFieldType(cell, FIELD_NAME, fieldTypes);
+
+ final FieldTypeInference fieldTypeInference =
fieldTypes.get(FIELD_NAME);
+ assertNotNull(fieldTypeInference);
+ }
+
+ private void assertNumericDateTimeDataTypeFound(final double
numericCellValue, final DataType expectedDataType) {
+ when(cell.getCellType()).thenReturn(CellType.NUMERIC);
+ when(cell.getNumericCellValue()).thenReturn(numericCellValue);
+ when(cell.getCellStyle()).thenReturn(cellStyle);
+ // Set Data Format to internal Date Format for Data Type detection in
DateUtil.isCellDateFormatted
+ when(cellStyle.getDataFormat()).thenReturn(EXCEL_INTERNAL_DATE_FORMAT);
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertEquals(expectedDataType, dataType);
+ }
+
+ private void assertFormulaNumericDateTimeDataTypeFound(final double
numericCellValue, final DataType expectedDataType) {
+ when(cell.getCellType()).thenReturn(CellType.FORMULA);
+ when(cell.getCachedFormulaResultType()).thenReturn(CellType.NUMERIC);
+ when(cell.getNumericCellValue()).thenReturn(numericCellValue);
+ when(cell.getCellStyle()).thenReturn(cellStyle);
+ // Set Data Format to internal Date Format for Data Type detection in
DateUtil.isCellDateFormatted
+ when(cellStyle.getDataFormat()).thenReturn(EXCEL_INTERNAL_DATE_FORMAT);
+
+ final DataType dataType = reader.getCellDataType(cell);
+
+ assertEquals(expectedDataType, dataType);
+ }
+}