This is an automated email from the ASF dual-hosted git repository.
exceptionfactory pushed a commit to branch support/nifi-1.x
in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/support/nifi-1.x by this push:
new e399750abe NIFI-13819 Set Row Number and Sheet Name for ExcelReader
Exceptions (#9334)
e399750abe is described below
commit e399750abeef7b8099d63c49ad326f859069dfee
Author: dan-s1 <[email protected]>
AuthorDate: Tue Oct 8 15:51:23 2024 -0400
NIFI-13819 Set Row Number and Sheet Name for ExcelReader Exceptions (#9334)
Signed-off-by: David Handermann <[email protected]>
---
.../org/apache/nifi/excel/ExcelRecordReader.java | 10 ++++--
.../apache/nifi/excel/TestExcelRecordReader.java | 38 +++++++++++++++++-----
2 files changed, 37 insertions(+), 11 deletions(-)
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
index b0c7a918bc..c18a31d1b2 100644
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
+++
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
@@ -86,14 +86,20 @@ public class ExcelRecordReader implements RecordReader {
@Override
public Record nextRecord(boolean coerceTypes, boolean dropUnknownFields)
throws MalformedRecordException {
+ Row currentRow = null;
try {
if (rowIterator.hasNext()) {
- Row currentRow = rowIterator.next();
+ currentRow = rowIterator.next();
Map<String, Object> currentRowValues =
getCurrentRowValues(currentRow, coerceTypes, dropUnknownFields);
return new MapRecord(schema, currentRowValues);
}
} catch (Exception e) {
- throw new MalformedRecordException("Read next Record from Excel
XLSX failed", e);
+ String exceptionMessage = "Read next Record from Excel XLSX
failed";
+ if (currentRow != null) {
+ exceptionMessage = String.format("%s on row %s in sheet %s",
+ exceptionMessage, currentRow.getRowNum(),
currentRow.getSheet().getSheetName());
+ }
+ throw new MalformedRecordException(exceptionMessage, e);
}
return null;
}
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
index 6f5a0afeed..e088d5761d 100644
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
+++
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
@@ -118,7 +118,7 @@ public class TestExcelRecordReader {
}
@Test
- public void testNonExcelFile() {
+ void testNonExcelFile() {
ExcelRecordReaderConfiguration configuration = new
ExcelRecordReaderConfiguration.Builder()
.build();
@@ -128,14 +128,14 @@ public class TestExcelRecordReader {
}
@Test
- public void testOlderExcelFormatFile() {
+ void testOlderExcelFormatFile() {
ExcelRecordReaderConfiguration configuration = new
ExcelRecordReaderConfiguration.Builder().build();
MalformedRecordException mre =
assertThrows(MalformedRecordException.class, () -> new
ExcelRecordReader(configuration, getInputStream("olderFormat.xls"), logger));
assertTrue(ExceptionUtils.getStackTrace(mre).contains("data appears to
be in the OLE2 Format"));
}
@Test
- public void testMultipleRecordsSingleSheet() throws
MalformedRecordException {
+ void testMultipleRecordsSingleSheet() throws MalformedRecordException {
ExcelRecordReaderConfiguration configuration = new
ExcelRecordReaderConfiguration.Builder()
.withSchema(getDataFormattingSchema())
.build();
@@ -177,7 +177,7 @@ public class TestExcelRecordReader {
@ParameterizedTest
@ValueSource(booleans = {true, false})
- public void testDropUnknownFields(boolean dropUnknownFields) throws
MalformedRecordException {
+ void testDropUnknownFields(boolean dropUnknownFields) throws
MalformedRecordException {
final List<RecordField> fields = Arrays.asList(
new RecordField("Numbers",
RecordFieldType.DOUBLE.getDataType()),
new RecordField("Timestamps",
RecordFieldType.DATE.getDataType()));
@@ -201,7 +201,7 @@ public class TestExcelRecordReader {
}
@Test
- public void testSkipLines() throws MalformedRecordException {
+ void testSkipLines() throws MalformedRecordException {
ExcelRecordReaderConfiguration configuration = new
ExcelRecordReaderConfiguration.Builder()
.withFirstRow(5)
.withSchema(getDataFormattingSchema())
@@ -215,7 +215,7 @@ public class TestExcelRecordReader {
@ParameterizedTest
@ValueSource(booleans = {true, false})
- public void tesCoerceTypes(boolean coerceTypes) throws
MalformedRecordException {
+ void tesCoerceTypes(boolean coerceTypes) throws MalformedRecordException {
String fieldName = "dates";
RecordSchema schema = new
SimpleRecordSchema(Collections.singletonList(new RecordField(fieldName,
RecordFieldType.TIMESTAMP.getDataType())));
ExcelRecordReaderConfiguration configuration = new
ExcelRecordReaderConfiguration.Builder()
@@ -233,7 +233,7 @@ public class TestExcelRecordReader {
}
@Test
- public void testSelectSpecificSheet() throws MalformedRecordException {
+ void testSelectSpecificSheet() throws MalformedRecordException {
RecordSchema schema = getSpecificSheetSchema();
List<String> requiredSheets = Collections.singletonList("TestSheetA");
ExcelRecordReaderConfiguration configuration = new
ExcelRecordReaderConfiguration.Builder()
@@ -255,7 +255,7 @@ public class TestExcelRecordReader {
}
@Test
- public void testSelectSpecificSheetNotFound() {
+ void testSelectSpecificSheetNotFound() {
RecordSchema schema = getSpecificSheetSchema();
List<String> requiredSheets =
Collections.singletonList("notExistingSheet");
ExcelRecordReaderConfiguration configuration = new
ExcelRecordReaderConfiguration.Builder()
@@ -271,7 +271,7 @@ public class TestExcelRecordReader {
}
@Test
- public void testSelectAllSheets() throws MalformedRecordException {
+ void testSelectAllSheets() throws MalformedRecordException {
RecordSchema schema = new SimpleRecordSchema(Arrays.asList(new
RecordField("first", RecordFieldType.STRING.getDataType()),
new RecordField("second",
RecordFieldType.STRING.getDataType())));
ExcelRecordReaderConfiguration configuration = new
ExcelRecordReaderConfiguration.Builder()
@@ -284,6 +284,26 @@ public class TestExcelRecordReader {
assertEquals(7, records.size());
}
+ @Test
+ void testWhereCellValueDoesNotMatchSchemaType() {
+ RecordSchema schema = new SimpleRecordSchema(Arrays.asList(new
RecordField("first", RecordFieldType.STRING.getDataType()),
+ new RecordField("second",
RecordFieldType.FLOAT.getDataType())));
+ List<String> requiredSheets = Collections.singletonList("TestSheetA");
+ ExcelRecordReaderConfiguration configuration = new
ExcelRecordReaderConfiguration.Builder()
+ .withSchema(schema)
+ .withFirstRow(2)
+ .withRequiredSheets(requiredSheets)
+ .build();
+
+ final MalformedRecordException mre =
assertThrows(MalformedRecordException.class, () -> {
+ ExcelRecordReader recordReader = new
ExcelRecordReader(configuration, getInputStream(MULTI_SHEET_FILE), logger);
+ getRecords(recordReader, true, false);
+ });
+
+ assertInstanceOf(NumberFormatException.class, mre.getCause());
+ assertTrue(mre.getMessage().contains("on row") &&
mre.getMessage().contains("in sheet"));
+ }
+
@Test
void testPasswordProtected() throws Exception {
RecordSchema schema = getPasswordProtectedSchema();