This is an automated email from the ASF dual-hosted git repository.

exceptionfactory pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git


The following commit(s) were added to refs/heads/main by this push:
     new 27522cf9e7 NIFI-14426 Added support for Binary XLS format to 
ExcelReader (#9874)
27522cf9e7 is described below

commit 27522cf9e74659d1a752a9b9722911f1c8c801dd
Author: zhtk <[email protected]>
AuthorDate: Sat May 10 18:40:53 2025 +0200

    NIFI-14426 Added support for Binary XLS format to ExcelReader (#9874)
    
    Signed-off-by: David Handermann <[email protected]>
    Co-authored-by: dan-s1 <[email protected]>
---
 .../nifi/excel/ExcelHeaderSchemaStrategy.java      |   2 +
 .../java/org/apache/nifi/excel/ExcelReader.java    |  17 ++-
 .../org/apache/nifi/excel/ExcelRecordReader.java   |   2 +-
 .../nifi/excel/ExcelRecordReaderConfiguration.java |  12 ++
 .../org/apache/nifi/excel/ExcelRecordSource.java   |   2 +
 .../java/org/apache/nifi/excel/InputFileType.java  |  45 +++++++
 .../java/org/apache/nifi/excel/RowIterator.java    |  29 ++++-
 .../apache/nifi/excel/TestExcelRecordReader.java   | 142 ++++++++++++++-------
 8 files changed, 191 insertions(+), 60 deletions(-)

diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelHeaderSchemaStrategy.java
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelHeaderSchemaStrategy.java
index 5db05490d2..fc8073adf7 100644
--- 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelHeaderSchemaStrategy.java
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelHeaderSchemaStrategy.java
@@ -74,10 +74,12 @@ public class ExcelHeaderSchemaStrategy implements 
SchemaAccessStrategy {
         final int firstRow = rawFirstRow == null ? 
NumberUtils.toInt(ExcelReader.STARTING_ROW.getDefaultValue()) : rawFirstRow;
         final int zeroBasedFirstRow = ExcelReader.getZeroBasedIndex(firstRow);
         final String password = 
context.getProperty(ExcelReader.PASSWORD).getValue();
+        final InputFileType inputFileType = 
context.getProperty(ExcelReader.INPUT_FILE_TYPE).asAllowableValue(InputFileType.class);
         final ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
                 .withRequiredSheets(requiredSheets)
                 .withFirstRow(zeroBasedFirstRow)
                 .withPassword(password)
+                .withInputFileType(inputFileType)
                 .build();
 
         final RowIterator rowIterator = new RowIterator(contentStream, 
configuration, logger);
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
index 428721e745..d36c7f7923 100644
--- 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
@@ -53,13 +53,13 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
-@Tags({"excel", "spreadsheet", "xlsx", "parse", "record", "row", "reader", 
"values", "cell"})
+@Tags({"excel", "spreadsheet", "xls", "xlsx", "parse", "record", "row", 
"reader", "values", "cell"})
 @CapabilityDescription("Parses a Microsoft Excel document returning each row 
in each sheet as a separate record. "
         + "This reader allows for inferring a schema from all the required 
sheets "
         + "or providing an explicit schema for interpreting the values."
         + "See Controller Service's Usage for further documentation. "
-        + "This reader is currently only capable of processing .xlsx "
-        + "(XSSF 2007 OOXML file format) Excel documents and not older .xls 
(HSSF '97(-2007) file format) documents.")
+        + "This reader is capable of processing both password and non password 
protected .xlsx (XSSF 2007 OOXML file format) "
+        + "and older .xls (HSSF '97(-2007) file format) Excel documents.")
 public class ExcelReader extends SchemaRegistryService implements 
RecordReaderFactory {
 
     public static final PropertyDescriptor REQUIRED_SHEETS = new 
PropertyDescriptor
@@ -85,6 +85,15 @@ public class ExcelReader extends SchemaRegistryService 
implements RecordReaderFa
             .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
             .build();
 
+    public static final PropertyDescriptor INPUT_FILE_TYPE = new 
PropertyDescriptor
+            .Builder().name("Input File Type")
+            .displayName("Input File Type")
+            .description("Specifies type of Excel input file.")
+            .required(true)
+            .allowableValues(InputFileType.class)
+            .defaultValue(InputFileType.XLSX)
+            .build();
+
     public static final PropertyDescriptor PROTECTION_TYPE = new 
PropertyDescriptor
             .Builder().name("Protection Type")
             .displayName("Protection Type")
@@ -128,6 +137,7 @@ public class ExcelReader extends SchemaRegistryService 
implements RecordReaderFa
         final List<String> requiredSheets = getRequiredSheets(variables);
         final int firstRow = getStartingRow(variables);
         final String password = 
configurationContext.getProperty(PASSWORD).getValue();
+        final InputFileType inputFileType = 
configurationContext.getProperty(INPUT_FILE_TYPE).asAllowableValue(InputFileType.class);
         final ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
                 .withDateFormat(dateFormat)
                 .withRequiredSheets(requiredSheets)
@@ -136,6 +146,7 @@ public class ExcelReader extends SchemaRegistryService 
implements RecordReaderFa
                 .withTimeFormat(timeFormat)
                 .withTimestampFormat(timestampFormat)
                 .withPassword(password)
+                .withInputFileType(inputFileType)
                 .build();
 
         return new ExcelRecordReader(configuration, in, logger);
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
index ea45a2adca..d055a1f251 100644
--- 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
@@ -71,7 +71,7 @@ public class ExcelRecordReader implements RecordReader {
         try {
             this.rowIterator = new RowIterator(inputStream, configuration, 
logger);
         } catch (RuntimeException e) {
-            throw new MalformedRecordException("Read initial Record from Excel 
XLSX failed", e);
+            throw new MalformedRecordException("Read initial Record from Excel 
file failed", e);
         }
     }
 
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java
index 6faa826ad5..f3fd777ec1 100644
--- 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java
@@ -30,6 +30,7 @@ public class ExcelRecordReaderConfiguration {
     private String timestampFormat;
     private String password;
     private boolean avoidTempFiles;
+    private InputFileType inputFileType;
 
     private ExcelRecordReaderConfiguration() {
     }
@@ -66,6 +67,10 @@ public class ExcelRecordReaderConfiguration {
         return avoidTempFiles;
     }
 
+    public InputFileType getInputFileType() {
+        return inputFileType;
+    }
+
     public static final class Builder {
         private RecordSchema schema;
         private List<String> requiredSheets;
@@ -75,6 +80,7 @@ public class ExcelRecordReaderConfiguration {
         private String timestampFormat;
         private String password;
         private boolean avoidTempFiles;
+        private InputFileType inputFileType;
 
         public Builder withSchema(RecordSchema schema) {
             this.schema = schema;
@@ -116,6 +122,11 @@ public class ExcelRecordReaderConfiguration {
             return this;
         }
 
+        public Builder withInputFileType(InputFileType inputFileType) {
+            this.inputFileType = inputFileType;
+            return this;
+        }
+
         public ExcelRecordReaderConfiguration build() {
             ExcelRecordReaderConfiguration excelRecordReaderConfiguration = 
new ExcelRecordReaderConfiguration();
             excelRecordReaderConfiguration.schema = this.schema;
@@ -126,6 +137,7 @@ public class ExcelRecordReaderConfiguration {
             excelRecordReaderConfiguration.firstRow = this.firstRow;
             excelRecordReaderConfiguration.password = password;
             excelRecordReaderConfiguration.avoidTempFiles = avoidTempFiles;
+            excelRecordReaderConfiguration.inputFileType = inputFileType;
 
             return excelRecordReaderConfiguration;
         }
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java
index 24f4a9fcff..0c98f6e0a0 100644
--- 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java
@@ -36,10 +36,12 @@ public class ExcelRecordSource implements RecordSource<Row> 
{
         final int firstRow = rawFirstRow == null ? 
NumberUtils.toInt(ExcelReader.STARTING_ROW.getDefaultValue()) : rawFirstRow;
         final int zeroBasedFirstRow = ExcelReader.getZeroBasedIndex(firstRow);
         final String password = 
context.getProperty(ExcelReader.PASSWORD).getValue();
+        final InputFileType inputFileType = 
context.getProperty(ExcelReader.INPUT_FILE_TYPE).asAllowableValue(InputFileType.class);
         final ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
                 .withRequiredSheets(requiredSheets)
                 .withFirstRow(zeroBasedFirstRow)
                 .withPassword(password)
+                .withInputFileType(inputFileType)
                 .build();
         this.rowIterator = new RowIterator(in, configuration, logger);
     }
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/InputFileType.java
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/InputFileType.java
new file mode 100644
index 0000000000..9f5d4df000
--- /dev/null
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/InputFileType.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.excel;
+
+import org.apache.nifi.components.DescribedValue;
+
+public enum InputFileType implements DescribedValue {
+    XLS("XLS 1997-2007 file format"),
+    XLSX("XLSX 2007-present OOXML file format");
+
+    InputFileType(String description) {
+        this.description = description;
+    }
+
+    private final String description;
+
+    @Override
+    public String getValue() {
+        return name();
+    }
+
+    @Override
+    public String getDisplayName() {
+        return name();
+    }
+
+    @Override
+    public String getDescription() {
+        return description;
+    }
+}
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java
index 6d067a7c07..d4172be8ad 100644
--- 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java
@@ -19,6 +19,8 @@ package org.apache.nifi.excel;
 import com.github.pjfanning.xlsx.StreamingReader;
 import org.apache.nifi.logging.ComponentLog;
 import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.ss.usermodel.Row;
 import org.apache.poi.ss.usermodel.Sheet;
 import org.apache.poi.ss.usermodel.Workbook;
@@ -42,13 +44,26 @@ class RowIterator implements Iterator<Row>, Closeable {
     private Row currentRow;
 
     RowIterator(final InputStream in, final ExcelRecordReaderConfiguration 
configuration, final ComponentLog logger) {
-        this.workbook = StreamingReader.builder()
-                .rowCacheSize(100)
-                .bufferSize(4096)
-                .password(configuration.getPassword())
-                .setAvoidTempFiles(configuration.isAvoidTempFiles())
-                .setReadSharedFormulas(true) // NOTE: If not set to true, then 
data with shared formulas fail.
-                .open(in);
+        if (configuration.getInputFileType() == InputFileType.XLSX) {
+            this.workbook = StreamingReader.builder()
+                    .rowCacheSize(100)
+                    .bufferSize(4096)
+                    .password(configuration.getPassword())
+                    .setAvoidTempFiles(configuration.isAvoidTempFiles())
+                    .setReadSharedFormulas(true) // NOTE: If not set to true, 
then data with shared formulas fail.
+                    .open(in);
+        } else {
+            // Providing the password to the HSSFWorkbook is done by setting a 
thread variable managed by
+            // Biff8EncryptionKey. After the workbook is created, the thread 
variable can be cleared.
+            
Biff8EncryptionKey.setCurrentUserPassword(configuration.getPassword());
+            try {
+                this.workbook = new HSSFWorkbook(in);
+            } catch (final IOException e) {
+                throw new ProcessException("Failed to open XLS file", e);
+            } finally {
+                Biff8EncryptionKey.setCurrentUserPassword(null);
+            }
+        }
 
         final List<String> requiredSheets = configuration.getRequiredSheets();
         if (requiredSheets == null || requiredSheets.isEmpty()) {
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
index 14f7015a15..4b33c2e238 100644
--- 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
@@ -16,7 +16,6 @@
  */
 package org.apache.nifi.excel;
 
-import org.apache.commons.lang3.exception.ExceptionUtils;
 import org.apache.nifi.logging.ComponentLog;
 import org.apache.nifi.processor.exception.ProcessException;
 import org.apache.nifi.serialization.MalformedRecordException;
@@ -25,7 +24,8 @@ import org.apache.nifi.serialization.record.Record;
 import org.apache.nifi.serialization.record.RecordField;
 import org.apache.nifi.serialization.record.RecordFieldType;
 import org.apache.nifi.serialization.record.RecordSchema;
-import com.github.pjfanning.xlsx.exceptions.ReadException;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.poifs.crypt.EncryptionInfo;
 import org.apache.poi.poifs.crypt.EncryptionMode;
@@ -33,13 +33,15 @@ import org.apache.poi.poifs.crypt.Encryptor;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.Row;
-import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.extension.ExtendWith;
 import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
 import org.junit.jupiter.params.provider.ValueSource;
 import org.mockito.Mock;
 import org.mockito.junit.jupiter.MockitoExtension;
@@ -53,9 +55,9 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.sql.Timestamp;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.function.Supplier;
 
 import static java.nio.file.Files.newDirectoryStream;
 import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
@@ -71,6 +73,7 @@ public class TestExcelRecordReader {
     private static final String MULTI_SHEET_FILE = "twoSheets.xlsx";
     private static final String PASSWORD = "nifi";
     private static final ByteArrayOutputStream PASSWORD_PROTECTED = new 
ByteArrayOutputStream();
+    private static final ByteArrayOutputStream PASSWORD_PROTECTED_OLDER_EXCEL 
= new ByteArrayOutputStream();
     private static final Object[][] DATA = {
             {"ID", "Name"},
             {1, "Manny"},
@@ -98,21 +101,8 @@ public class TestExcelRecordReader {
 
     @BeforeAll
     static void setUpBeforeAll() throws Exception {
-        //Generate an Excel file and populate it with data
-        final InputStream workbook = createWorkbook(DATA);
-
-        //Protect the Excel file with a password
-        try (POIFSFileSystem poifsFileSystem = new POIFSFileSystem()) {
-            EncryptionInfo encryptionInfo = new 
EncryptionInfo(EncryptionMode.agile);
-            Encryptor encryptor = encryptionInfo.getEncryptor();
-            encryptor.confirmPassword(PASSWORD);
-
-            try (OPCPackage opc = OPCPackage.open(workbook);
-                 OutputStream os = encryptor.getDataStream(poifsFileSystem)) {
-                opc.save(os);
-            }
-            poifsFileSystem.writeFilesystem(PASSWORD_PROTECTED);
-        }
+        createPasswordProtectedWorkbook();
+        createPasswordProtectedOlderExcelWorkbook();
     }
 
     @Test
@@ -122,20 +112,32 @@ public class TestExcelRecordReader {
 
         MalformedRecordException mre = 
assertThrows(MalformedRecordException.class, () -> new 
ExcelRecordReader(configuration, getInputStream("notExcel.txt"), logger));
         final Throwable cause = mre.getCause();
-        assertInstanceOf(ReadException.class, cause);
+        assertInstanceOf(ProcessException.class, cause);
     }
 
     @Test
-    public void testOlderExcelFormatFile() {
-        ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder().build();
-        MalformedRecordException mre = 
assertThrows(MalformedRecordException.class, () -> new 
ExcelRecordReader(configuration, getInputStream("olderFormat.xls"), logger));
-        assertTrue(ExceptionUtils.getStackTrace(mre).contains("data appears to 
be in the OLE2 Format"));
+    public void testOlderExcelFormatFile() throws MalformedRecordException {
+        final List<RecordField> fields = List.of(
+                new RecordField("A", RecordFieldType.STRING.getDataType()),
+                new RecordField("B", RecordFieldType.STRING.getDataType()),
+                new RecordField("C", RecordFieldType.STRING.getDataType()));
+
+        ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
+                .withSchema(new SimpleRecordSchema(fields))
+                .withInputFileType(InputFileType.XLS)
+                .build();
+
+        final ExcelRecordReader recordReader = new 
ExcelRecordReader(configuration, getInputStream("olderFormat.xls"), logger);
+        final List<Record> records = getRecords(recordReader, false, false);
+
+        assertEquals(4, records.size());
     }
 
     @Test
     public void testMultipleRecordsSingleSheet() throws 
MalformedRecordException {
         ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
                 .withSchema(getDataFormattingSchema())
+                .withInputFileType(InputFileType.XLSX)
                 .build();
 
         ExcelRecordReader recordReader = new ExcelRecordReader(configuration, 
getInputStream(DATA_FORMATTING_FILE), logger);
@@ -145,7 +147,7 @@ public class TestExcelRecordReader {
     }
 
     private RecordSchema getDataFormattingSchema() {
-        final List<RecordField> fields = Arrays.asList(
+        final List<RecordField> fields = List.of(
                 new RecordField("Numbers", 
RecordFieldType.DOUBLE.getDataType()),
                 new RecordField("Timestamps", 
RecordFieldType.DATE.getDataType()),
                 new RecordField("Money", RecordFieldType.DOUBLE.getDataType()),
@@ -176,12 +178,13 @@ public class TestExcelRecordReader {
     @ParameterizedTest
     @ValueSource(booleans = {true, false})
     public void testDropUnknownFields(boolean dropUnknownFields) throws 
MalformedRecordException {
-        final List<RecordField> fields = Arrays.asList(
+        final List<RecordField> fields = List.of(
                 new RecordField("Numbers", 
RecordFieldType.DOUBLE.getDataType()),
                 new RecordField("Timestamps", 
RecordFieldType.DATE.getDataType()));
 
         ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
                 .withSchema(new SimpleRecordSchema(fields))
+                .withInputFileType(InputFileType.XLSX)
                 .build();
 
         ExcelRecordReader recordReader = new ExcelRecordReader(configuration, 
getInputStream(DATA_FORMATTING_FILE), logger);
@@ -203,6 +206,7 @@ public class TestExcelRecordReader {
         ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
                 .withFirstRow(5)
                 .withSchema(getDataFormattingSchema())
+                .withInputFileType(InputFileType.XLSX)
                 .build();
 
         ExcelRecordReader recordReader = new ExcelRecordReader(configuration, 
getInputStream(DATA_FORMATTING_FILE), logger);
@@ -221,6 +225,7 @@ public class TestExcelRecordReader {
                 .withTimeFormat(RecordFieldType.TIME.getDefaultFormat())
                 
.withTimestampFormat(RecordFieldType.TIMESTAMP.getDefaultFormat())
                 .withSchema(schema)
+                .withInputFileType(InputFileType.XLSX)
                 .build();
 
         ExcelRecordReader recordReader = new ExcelRecordReader(configuration, 
getInputStream("dates.xlsx"), logger);
@@ -238,6 +243,7 @@ public class TestExcelRecordReader {
                 .withSchema(schema)
                 .withFirstRow(1)
                 .withRequiredSheets(requiredSheets)
+                .withInputFileType(InputFileType.XLSX)
                 .build();
 
         ExcelRecordReader recordReader = new ExcelRecordReader(configuration, 
getInputStream(MULTI_SHEET_FILE), logger);
@@ -247,7 +253,7 @@ public class TestExcelRecordReader {
     }
 
     private RecordSchema getSpecificSheetSchema() {
-        return  new SimpleRecordSchema(Arrays.asList(new RecordField("first", 
RecordFieldType.STRING.getDataType()),
+        return  new SimpleRecordSchema(List.of(new RecordField("first", 
RecordFieldType.STRING.getDataType()),
                 new RecordField("second", 
RecordFieldType.STRING.getDataType()),
                 new RecordField("third", 
RecordFieldType.STRING.getDataType())));
     }
@@ -260,6 +266,7 @@ public class TestExcelRecordReader {
                 .withSchema(schema)
                 .withFirstRow(1)
                 .withRequiredSheets(requiredSheets)
+                .withInputFileType(InputFileType.XLSX)
                 .build();
 
         MalformedRecordException mre = 
assertThrows(MalformedRecordException.class,
@@ -270,10 +277,11 @@ public class TestExcelRecordReader {
 
     @Test
     public void testSelectAllSheets() throws MalformedRecordException {
-        RecordSchema schema = new SimpleRecordSchema(Arrays.asList(new 
RecordField("first", RecordFieldType.STRING.getDataType()),
+        RecordSchema schema = new SimpleRecordSchema(List.of(new 
RecordField("first", RecordFieldType.STRING.getDataType()),
                 new RecordField("second", 
RecordFieldType.STRING.getDataType())));
         ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
                 .withSchema(schema)
+                .withInputFileType(InputFileType.XLSX)
                 .build();
 
         ExcelRecordReader recordReader = new ExcelRecordReader(configuration, 
getInputStream(MULTI_SHEET_FILE), logger);
@@ -284,13 +292,14 @@ public class TestExcelRecordReader {
 
     @Test
     void testWhereCellValueDoesNotMatchSchemaType()  {
-        RecordSchema schema = new SimpleRecordSchema(Arrays.asList(new 
RecordField("first", RecordFieldType.STRING.getDataType()),
+        RecordSchema schema = new SimpleRecordSchema(List.of(new 
RecordField("first", RecordFieldType.STRING.getDataType()),
                 new RecordField("second", 
RecordFieldType.FLOAT.getDataType())));
         List<String> requiredSheets = Collections.singletonList("TestSheetA");
         ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
                 .withSchema(schema)
                 .withFirstRow(2)
                 .withRequiredSheets(requiredSheets)
+                .withInputFileType(InputFileType.XLSX)
                 .build();
 
         final MalformedRecordException mre = 
assertThrows(MalformedRecordException.class, () ->  {
@@ -302,48 +311,55 @@ public class TestExcelRecordReader {
         assertTrue(mre.getMessage().contains("on row") && 
mre.getMessage().contains("in sheet"));
     }
 
-    @Test
-    void testPasswordProtected() throws Exception {
-        RecordSchema schema = getPasswordProtectedSchema();
-        ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
+    @ParameterizedTest
+    @EnumSource(InputFileType.class)
+    void testPasswordProtected(InputFileType inputFileType) throws Exception {
+        final RecordSchema schema = getPasswordProtectedSchema();
+        final ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
                 .withSchema(schema)
                 .withPassword(PASSWORD)
                 .withAvoidTempFiles(true)
+                .withInputFileType(inputFileType)
                 .build();
 
-        InputStream inputStream = new 
ByteArrayInputStream(PASSWORD_PROTECTED.toByteArray());
-        ExcelRecordReader recordReader = new ExcelRecordReader(configuration, 
inputStream, logger);
-        List<Record> records = getRecords(recordReader, false, false);
+        final ByteArrayOutputStream fileStream = inputFileType == 
InputFileType.XLSX ? PASSWORD_PROTECTED : PASSWORD_PROTECTED_OLDER_EXCEL;
+        final InputStream inputStream = new 
ByteArrayInputStream(fileStream.toByteArray());
+        final ExcelRecordReader recordReader = new 
ExcelRecordReader(configuration, inputStream, logger);
+        final List<Record> records = getRecords(recordReader, false, false);
 
         assertEquals(DATA.length, records.size());
     }
 
-    @Test
-    void testPasswordProtectedWithoutPassword() {
-        RecordSchema schema = getPasswordProtectedSchema();
-        ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
+    @ParameterizedTest
+    @EnumSource(InputFileType.class)
+    void testPasswordProtectedWithoutPassword(InputFileType inputFileType) {
+        final RecordSchema schema = getPasswordProtectedSchema();
+        final ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
                 .withSchema(schema)
+                .withInputFileType(inputFileType)
                 .build();
 
-        InputStream inputStream = new 
ByteArrayInputStream(PASSWORD_PROTECTED.toByteArray());
+        final ByteArrayOutputStream fileStream = inputFileType == 
InputFileType.XLSX ? PASSWORD_PROTECTED : PASSWORD_PROTECTED_OLDER_EXCEL;
+        final InputStream inputStream = new 
ByteArrayInputStream(fileStream.toByteArray());
         assertThrows(Exception.class, () -> new 
ExcelRecordReader(configuration, inputStream, logger));
     }
 
     private RecordSchema getPasswordProtectedSchema() {
-        return new SimpleRecordSchema(Arrays.asList(new RecordField("id", 
RecordFieldType.INT.getDataType()),
+        return new SimpleRecordSchema(List.of(new RecordField("id", 
RecordFieldType.INT.getDataType()),
                 new RecordField("name", 
RecordFieldType.STRING.getDataType())));
     }
 
     @Test
     void testWithNumberColumnWhoseValueIsEmptyString() throws Exception {
-        final RecordSchema schema = new SimpleRecordSchema(Arrays.asList(new 
RecordField("first", RecordFieldType.STRING.getDataType()),
+        final RecordSchema schema = new SimpleRecordSchema(List.of(new 
RecordField("first", RecordFieldType.STRING.getDataType()),
                 new RecordField("second", 
RecordFieldType.LONG.getDataType())));
         final ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
                 .withSchema(schema)
+                .withInputFileType(InputFileType.XLSX)
                 .build();
 
         final Object[][] data = {{"Manny", ""}};
-        final InputStream workbook = createWorkbook(data);
+        final InputStream workbook = createWorkbook(data, XSSFWorkbook::new);
         final ExcelRecordReader recordReader = new 
ExcelRecordReader(configuration, workbook, logger);
 
         assertDoesNotThrow(() -> getRecords(recordReader, true, true));
@@ -355,9 +371,10 @@ public class TestExcelRecordReader {
         final RecordSchema schema = new SimpleRecordSchema(List.of(new 
RecordField(fieldName, RecordFieldType.STRING.getDataType())));
         final ExcelRecordReaderConfiguration configuration = new 
ExcelRecordReaderConfiguration.Builder()
                 .withSchema(schema)
+                .withInputFileType(InputFileType.XLSX)
                 .build();
         final Object[][] data = {{9876543210L}};
-        final InputStream workbook = createWorkbook(data);
+        final InputStream workbook = createWorkbook(data, XSSFWorkbook::new);
         final ExcelRecordReader recordReader = new 
ExcelRecordReader(configuration, workbook, logger);
         final List<Record> records = getRecords(recordReader, true, true);
         final Record firstRecord = records.getFirst();
@@ -366,10 +383,10 @@ public class TestExcelRecordReader {
         assertEquals(scientificNotationNumber, 
firstRecord.getAsString(fieldName));
     }
 
-    private static InputStream createWorkbook(Object[][] data) throws 
Exception {
+    private static InputStream createWorkbook(Object[][] data, 
Supplier<Workbook> workbookSupplier) throws Exception {
         final ByteArrayOutputStream workbookOutputStream = new 
ByteArrayOutputStream();
-        try (XSSFWorkbook workbook = new XSSFWorkbook()) {
-            final XSSFSheet sheet = workbook.createSheet("SomeSheetName");
+        try (Workbook workbook = workbookSupplier.get()) {
+            final Sheet sheet = workbook.createSheet("SomeSheetName");
             populateSheet(sheet, data);
             workbook.write(workbookOutputStream);
         }
@@ -377,7 +394,34 @@ public class TestExcelRecordReader {
         return new ByteArrayInputStream(workbookOutputStream.toByteArray());
     }
 
-    private static void populateSheet(XSSFSheet sheet, Object[][] data) {
+    private static void createPasswordProtectedWorkbook() throws Exception {
+        // Generate an Excel file and populate it with data
+        final InputStream workbook = createWorkbook(DATA, XSSFWorkbook::new);
+
+        // Protect the Excel file with a password
+        try (POIFSFileSystem poifsFileSystem = new POIFSFileSystem()) {
+            EncryptionInfo encryptionInfo = new 
EncryptionInfo(EncryptionMode.agile);
+            Encryptor encryptor = encryptionInfo.getEncryptor();
+            encryptor.confirmPassword(PASSWORD);
+
+            try (OPCPackage opc = OPCPackage.open(workbook);
+                 OutputStream os = encryptor.getDataStream(poifsFileSystem)) {
+                opc.save(os);
+            }
+            poifsFileSystem.writeFilesystem(PASSWORD_PROTECTED);
+        }
+    }
+
+    private static void createPasswordProtectedOlderExcelWorkbook() throws 
Exception {
+        Biff8EncryptionKey.setCurrentUserPassword(PASSWORD);
+        try (final InputStream inputStream = createWorkbook(DATA, 
HSSFWorkbook::new)) {
+            inputStream.transferTo(PASSWORD_PROTECTED_OLDER_EXCEL);
+        } finally {
+            Biff8EncryptionKey.setCurrentUserPassword(null);
+        }
+    }
+
+    private static void populateSheet(Sheet sheet, Object[][] data) {
         //Adding the data to the Excel worksheet
         int rowCount = 0;
         for (Object[] dataRow : data) {

Reply via email to