This is an automated email from the ASF dual-hosted git repository.
mthomsen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/main by this push:
new e9b532bd32 NIFI-12100 Removed the ConvertExcelToCSVProcessor
e9b532bd32 is described below
commit e9b532bd3237fb23b351471586d0f8aebab16d6b
Author: dan-s1 <[email protected]>
AuthorDate: Wed Sep 27 18:13:46 2023 +0000
NIFI-12100 Removed the ConvertExcelToCSVProcessor
This closes #7802
Signed-off-by: Mike Thomsen <[email protected]>
---
.../nifi-poi-bundle/nifi-poi-nar/pom.xml | 5 -
.../nifi-poi-bundle/nifi-poi-processors/pom.xml | 76 ---
.../processors/poi/ConvertExcelToCSVProcessor.java | 534 -------------------
.../services/org.apache.nifi.processor.Processor | 15 -
.../additionalDetails.html | 97 ----
.../poi/ConvertExcelToCSVProcessorTest.java | 578 ---------------------
.../src/test/resources/CollegeScorecard.xlsx | Bin 16553 -> 0 bytes
.../src/test/resources/TwoSheets.xlsx | Bin 8987 -> 0 bytes
.../src/test/resources/Unsupported.xls | Bin 26112 -> 0 bytes
.../src/test/resources/dataformatting.xlsx | Bin 10765 -> 0 bytes
.../src/test/resources/logback-test.xml | 32 --
.../src/test/resources/with-blank-cells.csv | 8 -
.../src/test/resources/with-blank-cells.xlsx | Bin 8489 -> 0 bytes
nifi-nar-bundles/nifi-poi-bundle/pom.xml | 1 -
14 files changed, 1346 deletions(-)
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml
index e61cd4cc10..e082a25af5 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml
@@ -30,11 +30,6 @@
</properties>
<dependencies>
- <dependency>
- <groupId>org.apache.nifi</groupId>
- <artifactId>nifi-poi-processors</artifactId>
- <version>2.0.0-SNAPSHOT</version>
- </dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-poi-services</artifactId>
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml
deleted file mode 100644
index 8669c93cc6..0000000000
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml
+++ /dev/null
@@ -1,76 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
https://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <parent>
- <groupId>org.apache.nifi</groupId>
- <artifactId>nifi-poi-bundle</artifactId>
- <version>2.0.0-SNAPSHOT</version>
- </parent>
-
- <artifactId>nifi-poi-processors</artifactId>
- <packaging>jar</packaging>
-
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.rat</groupId>
- <artifactId>apache-rat-plugin</artifactId>
- <configuration>
- <excludes combine.children="append">
-
<exclude>src/test/resources/with-blank-cells.csv</exclude>
- </excludes>
- </configuration>
- </plugin>
- </plugins>
- </build>
- <dependencies>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi-ooxml</artifactId>
- </dependency>
- <dependency>
- <groupId>com.github.pjfanning</groupId>
- <artifactId>excel-streaming-reader</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.logging.log4j</groupId>
- <artifactId>log4j-to-slf4j</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.nifi</groupId>
- <artifactId>nifi-api</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.nifi</groupId>
- <artifactId>nifi-utils</artifactId>
- <version>2.0.0-SNAPSHOT</version>
- </dependency>
- <dependency>
- <groupId>org.apache.nifi</groupId>
- <artifactId>nifi-standard-record-utils</artifactId>
- <version>2.0.0-SNAPSHOT</version>
- </dependency>
- <dependency>
- <groupId>org.apache.nifi</groupId>
- <artifactId>nifi-mock</artifactId>
- </dependency>
- </dependencies>
-</project>
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java
deleted file mode 100644
index 362c30b90a..0000000000
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java
+++ /dev/null
@@ -1,534 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nifi.processors.poi;
-
-import com.github.pjfanning.xlsx.StreamingReader;
-import com.github.pjfanning.xlsx.exceptions.OpenException;
-import com.github.pjfanning.xlsx.exceptions.ParseException;
-import com.github.pjfanning.xlsx.exceptions.ReadException;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.PrintStream;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-import org.apache.commons.csv.CSVFormat;
-import org.apache.commons.csv.CSVPrinter;
-import org.apache.commons.io.FilenameUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.nifi.annotation.behavior.WritesAttribute;
-import org.apache.nifi.annotation.behavior.WritesAttributes;
-import org.apache.nifi.annotation.documentation.CapabilityDescription;
-import org.apache.nifi.annotation.documentation.Tags;
-import org.apache.nifi.components.PropertyDescriptor;
-import org.apache.nifi.csv.CSVUtils;
-import org.apache.nifi.expression.ExpressionLanguageScope;
-import org.apache.nifi.flowfile.FlowFile;
-import org.apache.nifi.flowfile.attributes.CoreAttributes;
-import org.apache.nifi.processor.AbstractProcessor;
-import org.apache.nifi.processor.ProcessContext;
-import org.apache.nifi.processor.ProcessSession;
-import org.apache.nifi.processor.ProcessorInitializationContext;
-import org.apache.nifi.processor.Relationship;
-import org.apache.nifi.processor.exception.ProcessException;
-import org.apache.nifi.processor.util.StandardValidators;
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.ss.usermodel.Cell;
-import org.apache.poi.ss.usermodel.Sheet;
-import org.apache.poi.ss.usermodel.Workbook;
-
-
-@Tags({"excel", "csv", "poi"})
-@CapabilityDescription("Consumes a Microsoft Excel document and converts each
worksheet to csv. Each sheet from the incoming Excel " +
- "document will generate a new Flowfile that will be output from this
processor. Each output Flowfile's contents will be formatted as a csv file " +
- "where the each row from the excel sheet is output as a newline in the
csv file. This processor is currently only capable of processing .xlsx " +
- "(XSSF 2007 OOXML file format) Excel documents and not older .xls
(HSSF '97(-2007) file format) documents. This processor also expects well
formatted " +
- "CSV content and will not escape cell's containing invalid content
such as newlines or additional commas.")
-@WritesAttributes({@WritesAttribute(attribute = "sheetname", description =
"The name of the Excel sheet that this particular row of data came from in the
Excel document"),
- @WritesAttribute(attribute = "numrows", description = "The number of
rows in this Excel Sheet"),
- @WritesAttribute(attribute = "sourcefilename", description = "The name
of the Excel document file that this data originated from"),
- @WritesAttribute(attribute = "convertexceltocsvprocessor.error",
description = "Error message that was encountered on a per Excel sheet basis.
This attribute is" +
- " only populated if an error was occured while processing the
particular sheet. Having the error present at the sheet level will allow for
the end" +
- " user to better understand what syntax errors in their excel
doc on a larger scale caused the error.")})
-public class ConvertExcelToCSVProcessor extends AbstractProcessor {
-
- private static final String CSV_MIME_TYPE = "text/csv";
- public static final String SHEET_NAME = "sheetname";
- public static final String ROW_NUM = "numrows";
- public static final String SOURCE_FILE_NAME = "sourcefilename";
- private static final String DESIRED_SHEETS_DELIMITER = ",";
- private static final String UNKNOWN_SHEET_NAME = "UNKNOWN";
-
- public static final PropertyDescriptor DESIRED_SHEETS = new
PropertyDescriptor
- .Builder().name("extract-sheets")
- .displayName("Sheets to Extract")
- .description("Comma separated list of Excel document sheet names
that should be extracted from the excel document. If this property" +
- " is left blank then all of the sheets will be extracted
from the Excel document. The list of names is case in-sensitive. Any sheets not
" +
- "specified in this value will be ignored. A bulletin will
be generated if a specified sheet(s) are not found.")
- .required(false)
-
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
- .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
- .build();
-
- public static final PropertyDescriptor ROWS_TO_SKIP = new
PropertyDescriptor
- .Builder().name("excel-extract-first-row")
- .displayName("Number of Rows to Skip")
- .description("The row number of the first row to start processing."
- + "Use this to skip over rows of data at the top of your
worksheet that are not part of the dataset."
- + "Empty rows of data anywhere in the spreadsheet will
always be skipped, no matter what this value is set to.")
- .required(true)
- .defaultValue("0")
-
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
- .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
- .build();
-
- public static final PropertyDescriptor COLUMNS_TO_SKIP = new
PropertyDescriptor
- .Builder().name("excel-extract-column-to-skip")
- .displayName("Columns To Skip")
- .description("Comma delimited list of column numbers to skip. Use
the columns number and not the letter designation. "
- + "Use this to skip over columns anywhere in your
worksheet that you don't want extracted as part of the record.")
- .required(false)
-
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
- .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
- .build();
-
- public static final PropertyDescriptor FORMAT_VALUES = new
PropertyDescriptor.Builder()
- .name("excel-format-values")
- .displayName("Format Cell Values")
- .description("Should the cell values be written to CSV using the
formatting applied in Excel, or should they be printed as raw values.")
- .allowableValues("true", "false")
- .defaultValue("false")
- .required(true)
- .build();
-
- public static final Relationship ORIGINAL = new Relationship.Builder()
- .name("original")
- .description("Original Excel document received by this processor")
- .build();
-
- public static final Relationship SUCCESS = new Relationship.Builder()
- .name("success")
- .description("Excel data converted to csv")
- .build();
-
- public static final Relationship FAILURE = new Relationship.Builder()
- .name("failure")
- .description("Failed to parse the Excel document")
- .build();
-
- private List<PropertyDescriptor> descriptors;
-
- private Set<Relationship> relationships;
-
- @Override
- protected void init(final ProcessorInitializationContext context) {
- final List<PropertyDescriptor> descriptors = new ArrayList<>();
- descriptors.add(DESIRED_SHEETS);
- descriptors.add(ROWS_TO_SKIP);
- descriptors.add(COLUMNS_TO_SKIP);
- descriptors.add(FORMAT_VALUES);
-
- descriptors.add(CSVUtils.CSV_FORMAT);
- descriptors.add(CSVUtils.VALUE_SEPARATOR);
- descriptors.add(CSVUtils.INCLUDE_HEADER_LINE);
- descriptors.add(CSVUtils.QUOTE_CHAR);
- descriptors.add(CSVUtils.ESCAPE_CHAR);
- descriptors.add(CSVUtils.COMMENT_MARKER);
- descriptors.add(CSVUtils.NULL_STRING);
- descriptors.add(CSVUtils.TRIM_FIELDS);
- descriptors.add(new PropertyDescriptor.Builder()
- .fromPropertyDescriptor(CSVUtils.QUOTE_MODE)
- .defaultValue(CSVUtils.QUOTE_NONE.getValue())
- .build());
- descriptors.add(CSVUtils.RECORD_SEPARATOR);
- descriptors.add(CSVUtils.TRAILING_DELIMITER);
- this.descriptors = Collections.unmodifiableList(descriptors);
-
- final Set<Relationship> relationships = new LinkedHashSet<>();
- relationships.add(ORIGINAL);
- relationships.add(SUCCESS);
- relationships.add(FAILURE);
- this.relationships = Collections.unmodifiableSet(relationships);
- }
-
- @Override
- public Set<Relationship> getRelationships() {
- return this.relationships;
- }
-
- @Override
- public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
- return descriptors;
- }
-
- @Override
- public void onTrigger(final ProcessContext context, final ProcessSession
session) throws ProcessException {
- final FlowFile flowFile = session.get();
- if (flowFile == null) {
- return;
- }
-
- final Map<String, Boolean> desiredSheets = getDesiredSheets(context,
flowFile);
- final boolean formatValues =
context.getProperty(FORMAT_VALUES).asBoolean();
- final CSVFormat csvFormat = CSVUtils.createCSVFormat(context,
flowFile.getAttributes());
-
- //Switch to 0 based index
- final int firstRow =
context.getProperty(ROWS_TO_SKIP).evaluateAttributeExpressions(flowFile).asInteger()
- 1;
- final List<Integer> columnsToSkip = getColumnsToSkip(context,
flowFile);
-
- try {
- session.read(flowFile, inputStream -> {
- try (Workbook workbook = StreamingReader.builder()
- .rowCacheSize(100)
- .bufferSize(4096)
- .setReadStyles(formatValues)
- .open(inputStream)) {
-
- if (!desiredSheets.isEmpty()) {
- desiredSheets.keySet().forEach(desiredSheet ->
workbook.forEach(sheet -> {
- if
(sheet.getSheetName().equalsIgnoreCase(desiredSheet)) {
- ExcelSheetReadConfig readConfig = new
ExcelSheetReadConfig(columnsToSkip, firstRow, sheet.getSheetName());
- handleExcelSheet(session, flowFile, sheet,
readConfig, csvFormat);
- desiredSheets.put(desiredSheet, Boolean.TRUE);
- }
- }));
-
- String sheetsNotFound =
getSheetsNotFound(desiredSheets);
- if (!sheetsNotFound.isEmpty()) {
- getLogger().warn("Excel sheet(s) not found: {}",
sheetsNotFound);
- }
- } else {
- workbook.forEach(sheet -> {
- ExcelSheetReadConfig readConfig = new
ExcelSheetReadConfig(columnsToSkip, firstRow, sheet.getSheetName());
- handleExcelSheet(session, flowFile, sheet,
readConfig, csvFormat);
- });
- }
- } catch (ParseException | OpenException | ReadException e) {
- if (e.getCause() instanceof InvalidFormatException) {
- String msg = "Only .xlsx Excel 2007 OOXML files are
supported";
- getLogger().error(msg, e);
- throw new UnsupportedOperationException(msg, e);
- }
- getLogger().error("Error occurred while processing Excel
document metadata", e);
- }
- });
-
- session.transfer(flowFile, ORIGINAL);
-
- } catch (RuntimeException ex) {
- getLogger().error("Failed to process incoming Excel document. " +
ex.getMessage(), ex);
- FlowFile failedFlowFile = session.putAttribute(flowFile,
- ConvertExcelToCSVProcessor.class.getName() + ".error",
ex.getMessage());
- session.transfer(failedFlowFile, FAILURE);
- }
- }
-
- private List<Integer> getColumnsToSkip(final ProcessContext context,
FlowFile flowFile) {
- final String[] columnsToSkip =
StringUtils.split(context.getProperty(COLUMNS_TO_SKIP)
- .evaluateAttributeExpressions(flowFile).getValue(), ",");
-
- if (columnsToSkip != null) {
- try {
- return Arrays.stream(columnsToSkip)
- .map(columnToSkip -> Integer.parseInt(columnToSkip) -
1)
- .collect(Collectors.toList());
- } catch (NumberFormatException e) {
- throw new ProcessException("Invalid column in Columns to Skip
list.", e);
- }
- }
-
- return new ArrayList<>();
- }
-
- private Map<String, Boolean> getDesiredSheets(final ProcessContext
context, FlowFile flowFile) {
- final String desiredSheetsDelimited =
context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions(flowFile).getValue();
- if (desiredSheetsDelimited != null) {
- String[] desiredSheets = StringUtils.split(desiredSheetsDelimited,
DESIRED_SHEETS_DELIMITER);
- if (desiredSheets != null) {
- return Arrays.stream(desiredSheets)
- .collect(Collectors.toMap(key -> key, value ->
Boolean.FALSE));
- } else {
- getLogger().debug("Excel document was parsed but no sheets
with the specified desired names were found.");
- }
- }
-
- return new HashMap<>();
- }
-
- /**
- * Handles an individual Excel sheet from the entire Excel document. Each
sheet will result in an individual flowfile.
- *
- * @param session The NiFi ProcessSession instance for the current
invocation.
- */
- private void handleExcelSheet(ProcessSession session, FlowFile
originalParentFF, final Sheet sheet, ExcelSheetReadConfig readConfig,
- CSVFormat csvFormat) {
-
- FlowFile ff = session.create(originalParentFF);
- final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat);
- try {
- ff = session.write(ff, out -> {
- PrintStream outPrint = new PrintStream(out, false,
StandardCharsets.UTF_8);
- sheetHandler.setOutput(outPrint);
- sheet.forEach(row -> {
- sheetHandler.startRow(row.getRowNum());
- row.forEach(sheetHandler::cell);
- sheetHandler.endRow();
- });
- sheetHandler.close();
- });
-
- ff = session.putAttribute(ff, SHEET_NAME,
readConfig.getSheetName());
- ff = session.putAttribute(ff, ROW_NUM,
Long.toString(sheetHandler.getRowCount()));
-
- if
(StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key())))
{
- ff = session.putAttribute(ff, SOURCE_FILE_NAME,
originalParentFF.getAttribute(CoreAttributes.FILENAME.key()));
- } else {
- ff = session.putAttribute(ff, SOURCE_FILE_NAME,
UNKNOWN_SHEET_NAME);
- }
-
- //Update the CoreAttributes.FILENAME to have the .csv extension
now. Also update MIME.TYPE
- ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(),
updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()),
- ff.getAttribute(CoreAttributes.FILENAME.key()),
readConfig.getSheetName()));
- ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(),
CSV_MIME_TYPE);
-
- session.transfer(ff, SUCCESS);
-
- } catch (RuntimeException e) {
- ff = session.putAttribute(ff,
ConvertExcelToCSVProcessor.class.getName() + ".error", e.getMessage());
- session.transfer(ff, FAILURE);
- }
- }
-
- private String getSheetsNotFound(Map<String, Boolean> desiredSheets) {
- return desiredSheets.entrySet().stream()
- .filter(entry -> !entry.getValue())
- .map(Map.Entry::getKey)
- .collect(Collectors.joining(","));
- }
-
- /**
- * Uses the com.github.pjfanning streaming cell implementation to
- * do most of the work of parsing the contents of the Excel sheet
- * and outputs the contents as a (basic) CSV.
- */
- private class SheetToCSV {
- private final ExcelSheetReadConfig readConfig;
- CSVFormat csvFormat;
- private boolean firstCellOfRow;
- private boolean skipRow;
- private int currentRow = -1;
- private int currentCol = -1;
- private int rowCount = 0;
- private int skippedColumns = 0;
- private CSVPrinter printer;
- private boolean firstRow = false;
- private ArrayList<String> fieldValues;
-
- public int getRowCount() {
- return rowCount;
- }
-
- public void setOutput(PrintStream output) {
- final OutputStreamWriter streamWriter = new
OutputStreamWriter(output, StandardCharsets.UTF_8);
-
- try {
- printer = new CSVPrinter(streamWriter, csvFormat);
- } catch (IOException e) {
- throw new ProcessException("Failed to create CSV Printer.", e);
- }
- }
-
- public SheetToCSV(ExcelSheetReadConfig readConfig, CSVFormat
csvFormat) {
- this.readConfig = readConfig;
- this.csvFormat = csvFormat;
- }
-
- public void startRow(int rowNum) {
- if (rowNum <= readConfig.getOverrideFirstRow()) {
- skipRow = true;
- return;
- }
-
- // Prepare for this row
- skipRow = false;
- firstCellOfRow = true;
- firstRow = currentRow == -1;
- currentRow = rowNum;
- currentCol = -1;
- fieldValues = new ArrayList<>();
- }
-
- public void endRow() {
- if(skipRow) {
- return;
- }
-
- if(firstRow) {
- readConfig.setLastColumn(currentCol);
- }
-
- //if there was no data in this row, don't write it
- if(fieldValues.stream()
- .noneMatch(string -> string != null && !string.isEmpty()))
{
- return;
- }
-
- // Ensure the correct number of columns
- int columnsToAdd = (readConfig.getLastColumn() - currentCol) -
readConfig.getColumnsToSkip().size();
- for (int i = 0; i < columnsToAdd; i++) {
- fieldValues.add(null);
- }
-
- try {
- printer.printRecord(fieldValues);
- } catch (IOException e) {
- getLogger().warn("Print Record failed", e);
- }
-
- rowCount++;
- }
-
- public void cell(Cell cell) {
- if (skipRow) {
- return;
- }
-
- // Did we miss any cells?
- int thisCol = cell.getColumnIndex();
-
- //Use the first row of the file to decide on the area of data to
export
- if (firstRow && firstCellOfRow) {
- readConfig.setFirstColumn(thisCol);
- }
-
- //if this cell falls outside our area, or has been explicitly
marked as a skipped column, return and don't write it out.
- if (!firstRow && (thisCol < readConfig.getFirstColumn() || thisCol
> readConfig.getLastColumn())) {
- return;
- }
-
- if (readConfig.getColumnsToSkip().contains(thisCol)) {
- skippedColumns++;
- return;
- }
-
- int missedCols = (thisCol - readConfig.getFirstColumn()) -
(currentCol - readConfig.getFirstColumn()) - 1;
- if (firstCellOfRow) {
- missedCols = (thisCol - readConfig.getFirstColumn());
- }
-
- missedCols -= skippedColumns;
-
- if (firstCellOfRow) {
- firstCellOfRow = false;
- }
-
- for (int i = 0; i < missedCols; i++) {
- fieldValues.add(null);
- }
- currentCol = thisCol;
-
- String stringCellValue = cell.getStringCellValue();
- fieldValues.add(stringCellValue != null &&
!stringCellValue.isEmpty() ? stringCellValue : null);
-
- skippedColumns = 0;
- }
-
- public void close() throws IOException {
- printer.close();
- }
- }
-
- /**
- * Takes the original input filename and updates it by removing the file
extension and replacing it with
- * the .csv extension.
- *
- * @param origFileName Original filename from the input file.
- * @return The new filename with the .csv extension that should be place
in the output flowfile's attributes
- */
- private String updateFilenameToCSVExtension(String nifiUUID, String
origFileName, String sheetName) {
-
- StringBuilder stringBuilder = new StringBuilder();
-
- if (StringUtils.isNotEmpty(origFileName)) {
- String ext = FilenameUtils.getExtension(origFileName);
- if (StringUtils.isNotEmpty(ext)) {
- stringBuilder.append(StringUtils.replace(origFileName, ("." +
ext), ""));
- } else {
- stringBuilder.append(origFileName);
- }
- } else {
- stringBuilder.append(nifiUUID);
- }
-
- stringBuilder.append("_");
- stringBuilder.append(sheetName);
- stringBuilder.append(".");
- stringBuilder.append("csv");
-
- return stringBuilder.toString();
- }
-
- private static class ExcelSheetReadConfig {
- public String getSheetName() {
- return sheetName;
- }
-
- public int getFirstColumn() {
- return firstColumn;
- }
-
- public void setFirstColumn(int value) {
- this.firstColumn = value;
- }
-
- public int getLastColumn() {
- return lastColumn;
- }
-
- public void setLastColumn(int lastColumn) {
- this.lastColumn = lastColumn;
- }
-
- public int getOverrideFirstRow() {
- return overrideFirstRow;
- }
-
- public List<Integer> getColumnsToSkip() {
- return columnsToSkip;
- }
-
- private int firstColumn;
- private int lastColumn;
- private final int overrideFirstRow;
- private final String sheetName;
- private final List<Integer> columnsToSkip;
-
- public ExcelSheetReadConfig(List<Integer> columnsToSkip, int
overrideFirstRow, String sheetName) {
-
- this.sheetName = sheetName;
- this.columnsToSkip = columnsToSkip;
- this.overrideFirstRow = overrideFirstRow;
- }
- }
-}
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
deleted file mode 100644
index 43baa0b62f..0000000000
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
+++ /dev/null
@@ -1,15 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor
\ No newline at end of file
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/docs/org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor/additionalDetails.html
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/docs/org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor/additionalDetails.html
deleted file mode 100644
index fbefa08015..0000000000
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/docs/org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor/additionalDetails.html
+++ /dev/null
@@ -1,97 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<head>
- <meta charset="utf-8" />
- <title>ConvertExcelToCSVProcessor</title>
- <style>
-table {
- border-collapse: collapse;
-}
-
-table, th, td {
- border: 1px solid #ccc;
-}
-
-td.r {
- text-align: right;
-}
-
-td {
- width: 50px;
- padding: 5px;
-}
- </style>
- <link rel="stylesheet" href="../../../../../css/component-usage.css"
type="text/css" />
-</head>
-
-<body>
-<h2>How it extracts CSV data from a sheet</h2>
-<p>
- ConvertExcelToCSVProcessor extracts CSV data with following rules:
-</p>
-<ul>
- <li>Find the fist cell which has a value in it (the FirstCell).</li>
- <li>Scan cells in the first row, starting from the FirstCell,
- until it reaches to a cell after which no cell with a value can not be
found in the row (the FirstRowLastCell).</li>
- <li>Process the 2nd row and later, from the column of FirstCell to the
column of FirstRowLastCell.</li>
- <li>If a row does not have any cell that has a value, then the row is
ignored.</li>
-</ul>
-
-<p>
- As an example, the sheet shown below will be:
-</p>
-
-<table>
- <tbody>
- <tr><th>row
</th><th>A</th><th>B</th><th>C</th><th>D</th><th>E</th><th>F</th><th>G</th></tr>
- <tr><td class="r"> 1</td><td> </td><td> </td><td> </td><td> </td><td>
</td><td> </td><td> </td></tr>
- <tr><td class="r"> 2</td><td> </td><td>
</td><td>x</td><td>y</td><td>z</td><td> </td><td> </td></tr>
- <tr><td class="r"> 3</td><td> </td><td> </td><td>1</td><td> </td><td>
</td><td> </td><td> </td></tr>
- <tr><td class="r"> 4</td><td>2</td><td> </td><td> </td><td>3</td><td>
</td><td> </td><td> </td></tr>
- <tr><td class="r"> 5</td><td> </td><td> </td><td> </td><td>
</td><td>4</td><td> </td><td> </td></tr>
- <tr><td class="r"> 6</td><td> </td><td>
</td><td>5</td><td>6</td><td>7</td><td> </td><td> </td></tr>
- <tr><td class="r"> 7</td><td> </td><td> </td><td> </td><td> </td><td>
</td><td>8</td><td> </td></tr>
- <tr><td class="r"> 8</td><td> </td><td> </td><td> </td><td> </td><td>
</td><td> </td><td> </td></tr>
- <tr><td class="r"> 9</td><td> </td><td> </td><td> </td><td>
</td><td>9</td><td> </td><td> </td></tr>
- <tr><td class="r"> 10</td><td> </td><td> </td><td> </td><td> </td><td>
</td><td> </td><td> </td></tr>
- <tr><td class="r"> 11</td><td> </td><td> </td><td> </td><td> </td><td>
</td><td> </td><td> </td></tr>
- </tbody>
-</table>
-
-<p>
- converted to following CSV:
-</p>
-
-<pre>
-x,y,z
-1,,
-,3,
-,,4
-5,6,7
-,,9
-</pre>
-
-<ul>
- <li>C2(x) is the FirstCell, and E2(z) is the FirstRowLastCell.</li>
- <li>A4(2) is ignored because it is out of range. So is F7(8).</li>
- <li>Row 7 and 8 are ignored because those do not have a valid cell.</li>
- <li>It is important to have a header row as shown in the example to define
data area,
- especially when a sheet includes empty cells.</li>
-</ul>
-
-</body>
-</html>
\ No newline at end of file
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java
deleted file mode 100644
index 41b48921e8..0000000000
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java
+++ /dev/null
@@ -1,578 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nifi.processors.poi;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URL;
-import java.text.DecimalFormatSymbols;
-import java.time.LocalDateTime;
-import java.time.format.DateTimeFormatter;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.nifi.csv.CSVUtils;
-import org.apache.nifi.flowfile.attributes.CoreAttributes;
-import org.apache.nifi.util.LogMessage;
-import org.apache.nifi.util.MockFlowFile;
-import org.apache.nifi.util.TestRunner;
-import org.apache.nifi.util.TestRunners;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-public class ConvertExcelToCSVProcessorTest {
-
- private TestRunner testRunner;
-
- @BeforeEach
- public void init() {
- testRunner =
TestRunners.newTestRunner(ConvertExcelToCSVProcessor.class);
- }
-
- @Test
- public void testMultipleSheetsGeneratesMultipleFlowFiles() throws
IOException {
-
- Map<String, String> attributes = new HashMap<>();
- attributes.put("test", "attribute");
-
- final URL resourceUrl = getClass().getResource("/TwoSheets.xlsx");
- assertNotNull(resourceUrl);
-
- testRunner.enqueue(new File(resourceUrl.getPath()).toPath(),
attributes);
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 2);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ffSheetA =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long rowsSheetA =
Long.parseLong(ffSheetA.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(4, rowsSheetA);
-
assertTrue(ffSheetA.getAttribute(ConvertExcelToCSVProcessor.SHEET_NAME).equalsIgnoreCase("TestSheetA"));
- assertEquals("TwoSheets.xlsx",
ffSheetA.getAttribute(ConvertExcelToCSVProcessor.SOURCE_FILE_NAME));
-
- //Since TestRunner.run() will create a random filename even if the
attribute is set in enqueue manually we just check that "_{SHEETNAME}.csv is
present
-
assertTrue(ffSheetA.getAttribute(CoreAttributes.FILENAME.key()).endsWith("_TestSheetA.csv"));
- assertEquals("attribute", ffSheetA.getAttribute("test"));
-
- MockFlowFile ffSheetB =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(1);
- long rowsSheetB =
Long.parseLong(ffSheetB.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(3, rowsSheetB);
-
assertTrue(ffSheetB.getAttribute(ConvertExcelToCSVProcessor.SHEET_NAME).equalsIgnoreCase("TestSheetB"));
- assertEquals("TwoSheets.xlsx",
ffSheetB.getAttribute(ConvertExcelToCSVProcessor.SOURCE_FILE_NAME));
-
- //Since TestRunner.run() will create a random filename even if the
attribute is set in enqueue manually we just check that "_{SHEETNAME}.csv is
present
-
assertTrue(ffSheetB.getAttribute(CoreAttributes.FILENAME.key()).endsWith("_TestSheetB.csv"));
- assertEquals("attribute", ffSheetB.getAttribute("test"));
-
- }
-
- @Test
- public void testDataFormatting() {
-
testRunner.enqueue(getClass().getResourceAsStream("/dataformatting.xlsx"));
-
- testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES,
"false");
-
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long rowsSheet =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(9, rowsSheet);
-
- ff.assertContentEquals("Numbers,Timestamps,Money\n" +
- "1234.4559999999999,42736.5,123.45\n" +
- "1234.4559999999999,42736.5,123.45\n" +
- "1234.4559999999999,42736.5,123.45\n" +
- "1234.4559999999999,42736.5,1023.45\n" +
- "1234.4559999999999,42736.5,1023.45\n" +
- "987654321,42736.5,1023.45\n" +
- "987654321,,\n" +
- "987654321,,\n");
- }
-
- @Test
- public void testQuoting() {
-
testRunner.enqueue(getClass().getResourceAsStream("/dataformatting.xlsx"));
-
- testRunner.setProperty(CSVUtils.QUOTE_MODE, CSVUtils.QUOTE_MINIMAL);
- testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES,
"true");
-
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long rowsSheet =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(9, rowsSheet);
-
- LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
- DecimalFormatSymbols decimalFormatSymbols =
DecimalFormatSymbols.getInstance();
- char decimalSeparator = decimalFormatSymbols.getDecimalSeparator();
- char groupingSeparator = decimalFormatSymbols.getGroupingSeparator();
- ff.assertContentEquals(("Numbers,Timestamps,Money\n" +
- addQuotingIfNeeded(String.format("1234%1$s456",
decimalSeparator)) + "," +
DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + "," +
- addQuotingIfNeeded(String.format("$ 123%1$s45",
decimalSeparator)) + "\n" +
- addQuotingIfNeeded(String.format("1234%1$s46",
decimalSeparator)) + "," + DateTimeFormatter.ofPattern("hh:mm:ss
a").format(localDt) + "," +
- addQuotingIfNeeded(String.format("£ 123%1$s45",
decimalSeparator)) + "\n" +
- addQuotingIfNeeded(String.format("1234%1$s5",
decimalSeparator)) + ",\"" + DateTimeFormatter.ofPattern("EEEE, MMMM dd,
yyyy").format(localDt) + "\"," +
- addQuotingIfNeeded(String.format("¥ 123%1$s45",
decimalSeparator)) + "\n" +
- addQuotingIfNeeded(String.format("1%2$s234%1$s46",
decimalSeparator, groupingSeparator)) + "," +
DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + "," +
- addQuotingIfNeeded(String.format("$ 1%2$s023%1$s45",
decimalSeparator, groupingSeparator)) + "\n" +
- addQuotingIfNeeded(String.format("1%2$s234%1$s4560",
decimalSeparator, groupingSeparator)) + "," +
DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + "," +
- addQuotingIfNeeded(String.format("£ 1%2$s023%1$s45",
decimalSeparator, groupingSeparator)) + "\n" +
- addQuotingIfNeeded(String.format("9%1$s88E+08",
decimalSeparator)) + "," + DateTimeFormatter.ofPattern("yyyy/MM/dd/
HH:mm").format(localDt) + "," +
- addQuotingIfNeeded(String.format("¥ 1%2$s023%1$s45",
decimalSeparator, groupingSeparator)) + "\n" +
- addQuotingIfNeeded(String.format("9%1$s877E+08",
decimalSeparator)) + ",,\n" +
- addQuotingIfNeeded(String.format("9%1$s8765E+08",
decimalSeparator)) + ",,\n").replace("E+",
getExponentSeparator(decimalFormatSymbols)));
- }
-
- /**
- * Workaround for interaction between {@link DecimalFormatSymbols} and use
of custom {@link java.util.Locale}.
- */
- private static String getExponentSeparator(final DecimalFormatSymbols
decimalFormatSymbols) {
- final String exponentSeparator =
decimalFormatSymbols.getExponentSeparator();
- return (exponentSeparator.equals("e") ? "e" : exponentSeparator + "+");
- }
-
- @Test
- public void testSkipRows() {
-
testRunner.enqueue(getClass().getResourceAsStream("/dataformatting.xlsx"));
-
- testRunner.setProperty(ConvertExcelToCSVProcessor.ROWS_TO_SKIP, "2");
- testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES,
"true");
-
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long rowsSheet =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(7, rowsSheet, "Row count does match expected value.");
-
- LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
- DecimalFormatSymbols decimalFormatSymbols =
DecimalFormatSymbols.getInstance();
- String decimalSeparator = decimalFormatSymbols.getDecimalSeparator()
== ',' ? "\\," : String.valueOf(decimalFormatSymbols.getDecimalSeparator());
- String groupingSeparator = decimalFormatSymbols.getGroupingSeparator()
== ',' ? "\\," : String.valueOf(decimalFormatSymbols.getGroupingSeparator());
- ff.assertContentEquals(String.format("1234%1$s46," +
DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + ",£ 123%1$s45\n" +
- "1234%1$s5," + DateTimeFormatter.ofPattern("EEEE\\, MMMM dd\\,
yyyy").format(localDt) + ",¥ 123%1$s45\n" +
- "1%2$s234%1$s46," + DateTimeFormatter.ofPattern("d/M/yy
HH:mm").format(localDt) + ",$ 1%2$s023%1$s45\n" +
- "1%2$s234%1$s4560," + DateTimeFormatter.ofPattern("hh:mm
a").format(localDt) + ",£ 1%2$s023%1$s45\n" +
- "9%1$s88E+08," + DateTimeFormatter.ofPattern("yyyy/MM/dd/
HH:mm").format(localDt) + ",¥ 1%2$s023%1$s45\n" +
- "9%1$s877E+08,,\n" +
- "9%1$s8765E+08,,\n", decimalSeparator,
groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
- }
-
- @Test
- public void testSkipRowsWithEL() {
- Map<String, String> attributes = new HashMap<>();
- attributes.put("rowsToSkip", "2");
-
testRunner.enqueue(getClass().getResourceAsStream("/dataformatting.xlsx"),
attributes);
-
- testRunner.setProperty(ConvertExcelToCSVProcessor.ROWS_TO_SKIP,
"${rowsToSkip}");
- testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES,
"true");
-
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long rowsSheet =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(7, rowsSheet, "Row count does match expected value.");
-
- LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
- DecimalFormatSymbols decimalFormatSymbols =
DecimalFormatSymbols.getInstance();
- String decimalSeparator = decimalFormatSymbols.getDecimalSeparator()
== ',' ? "\\," : String.valueOf(decimalFormatSymbols.getDecimalSeparator());
- String groupingSeparator = decimalFormatSymbols.getGroupingSeparator()
== ',' ? "\\," : String.valueOf(decimalFormatSymbols.getGroupingSeparator());
- ff.assertContentEquals(String.format("1234%1$s46," +
DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + ",£ 123%1$s45\n" +
- "1234%1$s5," + DateTimeFormatter.ofPattern("EEEE\\, MMMM dd\\,
yyyy").format(localDt) + ",¥ 123%1$s45\n" +
- "1%2$s234%1$s46," + DateTimeFormatter.ofPattern("d/M/yy
HH:mm").format(localDt) + ",$ 1%2$s023%1$s45\n" +
- "1%2$s234%1$s4560," + DateTimeFormatter.ofPattern("hh:mm
a").format(localDt) + ",£ 1%2$s023%1$s45\n" +
- "9%1$s88E+08," + DateTimeFormatter.ofPattern("yyyy/MM/dd/
HH:mm").format(localDt) + ",¥ 1%2$s023%1$s45\n" +
- "9%1$s877E+08,,\n" +
- "9%1$s8765E+08,,\n", decimalSeparator,
groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
- }
-
- @Test
- public void testSkipColumns() throws Exception {
- testRunner.enqueue(new
File("src/test/resources/dataformatting.xlsx").toPath());
-
- testRunner.setProperty(ConvertExcelToCSVProcessor.COLUMNS_TO_SKIP,
"2");
- testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES,
"true");
-
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long rowsSheet =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(9, rowsSheet);
-
- DecimalFormatSymbols decimalFormatSymbols =
DecimalFormatSymbols.getInstance();
- String decimalSeparator = decimalFormatSymbols.getDecimalSeparator()
== ',' ? "\\," : String.valueOf(decimalFormatSymbols.getDecimalSeparator());
- String groupingSeparator = decimalFormatSymbols.getGroupingSeparator()
== ',' ? "\\," : String.valueOf(decimalFormatSymbols.getGroupingSeparator());
- ff.assertContentEquals(String.format("Numbers,Money\n" +
- "1234%1$s456,$ 123%1$s45\n" +
- "1234%1$s46,£ 123%1$s45\n" +
- "1234%1$s5,¥ 123%1$s45\n" +
- "1%2$s234%1$s46,$ 1%2$s023%1$s45\n" +
- "1%2$s234%1$s4560,£ 1%2$s023%1$s45\n" +
- "9%1$s88E+08,¥ 1%2$s023%1$s45\n" +
- "9%1$s877E+08,\n" +
- "9%1$s8765E+08,\n", decimalSeparator,
groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
- }
-
- @Test
- public void testSkipColumnsWithEL() throws Exception {
- Map<String, String> attributes = new HashMap<>();
- attributes.put("columnsToSkip", "2");
- testRunner.enqueue(new
File("src/test/resources/dataformatting.xlsx").toPath(), attributes);
-
- testRunner.setProperty(ConvertExcelToCSVProcessor.COLUMNS_TO_SKIP,
"${columnsToSkip}");
- testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES,
"true");
-
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long rowsSheet =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(9, rowsSheet);
-
- DecimalFormatSymbols decimalFormatSymbols =
DecimalFormatSymbols.getInstance();
- String decimalSeparator = decimalFormatSymbols.getDecimalSeparator()
== ',' ? "\\," : String.valueOf(decimalFormatSymbols.getDecimalSeparator());
- String groupingSeparator = decimalFormatSymbols.getGroupingSeparator()
== ',' ? "\\," : String.valueOf(decimalFormatSymbols.getGroupingSeparator());
- ff.assertContentEquals(String.format("Numbers,Money\n" +
- "1234%1$s456,$ 123%1$s45\n" +
- "1234%1$s46,£ 123%1$s45\n" +
- "1234%1$s5,¥ 123%1$s45\n" +
- "1%2$s234%1$s46,$ 1%2$s023%1$s45\n" +
- "1%2$s234%1$s4560,£ 1%2$s023%1$s45\n" +
- "9%1$s88E+08,¥ 1%2$s023%1$s45\n" +
- "9%1$s877E+08,\n" +
- "9%1$s8765E+08,\n", decimalSeparator,
groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
- }
-
- @Test
- public void testCustomDelimiters() throws Exception {
- testRunner.enqueue(new
File("src/test/resources/dataformatting.xlsx").toPath());
-
- testRunner.setProperty(CSVUtils.VALUE_SEPARATOR, "|");
- testRunner.setProperty(CSVUtils.RECORD_SEPARATOR, "\\r\\n");
- testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES,
"true");
-
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long rowsSheet =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(9, rowsSheet);
-
- LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
- DecimalFormatSymbols decimalFormatSymbols =
DecimalFormatSymbols.getInstance();
- String valueSeparator =
testRunner.getProcessContext().getProperty(CSVUtils.VALUE_SEPARATOR).evaluateAttributeExpressions(ff).getValue();
- String decimalSeparator =
(String.valueOf(decimalFormatSymbols.getDecimalSeparator()).equals(valueSeparator))
- ? ("\\" + decimalFormatSymbols.getDecimalSeparator()) :
String.valueOf(decimalFormatSymbols.getDecimalSeparator());
- String groupingSeparator =
String.valueOf(decimalFormatSymbols.getGroupingSeparator()).equals(valueSeparator)
- ? "\\" + decimalFormatSymbols.getGroupingSeparator() :
String.valueOf(decimalFormatSymbols.getGroupingSeparator());
- ff.assertContentEquals(String.format("Numbers|Timestamps|Money\r\n" +
- "1234%1$s456|" +
DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + "|$ 123%1$s45\r\n" +
- "1234%1$s46|" + DateTimeFormatter.ofPattern("hh:mm:ss
a").format(localDt) + "|£ 123%1$s45\r\n" +
- "1234%1$s5|" + DateTimeFormatter.ofPattern("EEEE, MMMM dd,
yyyy").format(localDt) + "|¥ 123%1$s45\r\n" +
- "1%2$s234%1$s46|" + DateTimeFormatter.ofPattern("d/M/yy
HH:mm").format(localDt) + "|$ 1%2$s023%1$s45\r\n" +
- "1%2$s234%1$s4560|" + DateTimeFormatter.ofPattern("hh:mm
a").format(localDt) + "|£ 1%2$s023%1$s45\r\n" +
- "9%1$s88E+08|" + DateTimeFormatter.ofPattern("yyyy/MM/dd/
HH:mm").format(localDt) + "|¥ 1%2$s023%1$s45\r\n" +
- "9%1$s877E+08||\r\n" +
- "9%1$s8765E+08||\r\n", decimalSeparator,
groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
- }
-
- @Test
- public void testCustomValueSeparatorWithEL() throws Exception {
- Map<String, String> attributes = new HashMap<>();
- attributes.put("csv.delimiter", "|");
- testRunner.enqueue(new
File("src/test/resources/dataformatting.xlsx").toPath(), attributes);
-
- testRunner.setProperty(CSVUtils.VALUE_SEPARATOR, "${csv.delimiter}");
- testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES,
"true");
-
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long rowsSheet =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(9, rowsSheet);
-
- LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
- DecimalFormatSymbols decimalFormatSymbols =
DecimalFormatSymbols.getInstance();
- String valueSeparator =
testRunner.getProcessContext().getProperty(CSVUtils.VALUE_SEPARATOR).evaluateAttributeExpressions(ff).getValue();
- String decimalSeparator =
(String.valueOf(decimalFormatSymbols.getDecimalSeparator()).equals(valueSeparator))
- ? ("\\" + decimalFormatSymbols.getDecimalSeparator()) :
String.valueOf(decimalFormatSymbols.getDecimalSeparator());
- String groupingSeparator =
String.valueOf(decimalFormatSymbols.getGroupingSeparator()).equals(valueSeparator)
- ? "\\" + decimalFormatSymbols.getGroupingSeparator() :
String.valueOf(decimalFormatSymbols.getGroupingSeparator());
- ff.assertContentEquals(String.format("Numbers|Timestamps|Money\n" +
- "1234%1$s456|" +
DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + "|$ 123%1$s45\n" +
- "1234%1$s46|" + DateTimeFormatter.ofPattern("hh:mm:ss
a").format(localDt) + "|£ 123%1$s45\n" +
- "1234%1$s5|" + DateTimeFormatter.ofPattern("EEEE, MMMM dd,
yyyy").format(localDt) + "|¥ 123%1$s45\n" +
- "1%2$s234%1$s46|" + DateTimeFormatter.ofPattern("d/M/yy
HH:mm").format(localDt) + "|$ 1%2$s023%1$s45\n" +
- "1%2$s234%1$s4560|" + DateTimeFormatter.ofPattern("hh:mm
a").format(localDt) + "|£ 1%2$s023%1$s45\n" +
- "9%1$s88E+08|" + DateTimeFormatter.ofPattern("yyyy/MM/dd/
HH:mm").format(localDt) + "|¥ 1%2$s023%1$s45\n" +
- "9%1$s877E+08||\n" +
- "9%1$s8765E+08||\n", decimalSeparator,
groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
- }
-
- @Test
- public void testCustomQuoteCharWithEL() throws Exception {
- Map<String, String> attributes = new HashMap<>();
- attributes.put("csv.quote", "'");
- testRunner.enqueue(new
File("src/test/resources/dataformatting.xlsx").toPath(), attributes);
-
- testRunner.setProperty(CSVUtils.QUOTE_CHAR, "${csv.quote}");
- testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES,
"true");
- testRunner.setProperty(CSVUtils.QUOTE_MODE, CSVUtils.QUOTE_ALL);
-
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long rowsSheet =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(9, rowsSheet);
-
- LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
- String quoteCharValue =
testRunner.getProcessContext().getProperty(CSVUtils.QUOTE_CHAR).evaluateAttributeExpressions(ff).getValue();
- DecimalFormatSymbols decimalFormatSymbols =
DecimalFormatSymbols.getInstance();
- char decimalSeparator = decimalFormatSymbols.getDecimalSeparator();
- char groupingSeparator = decimalFormatSymbols.getGroupingSeparator();
- ff.assertContentEquals(("'Numbers','Timestamps','Money'\n" +
- addQuotingIfNeeded(String.format("1234%1$s456",
decimalSeparator), quoteCharValue, true) + "," + quoteCharValue +
- DateTimeFormatter.ofPattern("d/M/yy").format(localDt) +
quoteCharValue + "," +
- addQuotingIfNeeded(String.format("$ 123%1$s45",
decimalSeparator), quoteCharValue, true) + "\n" +
- addQuotingIfNeeded(String.format("1234%1$s46",
decimalSeparator), quoteCharValue, true) + "," + quoteCharValue +
- DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt)
+ quoteCharValue + "," +
- addQuotingIfNeeded(String.format("£ 123%1$s45",
decimalSeparator), quoteCharValue, true) + "\n" +
- addQuotingIfNeeded(String.format("1234%1$s5",
decimalSeparator), quoteCharValue, true) + "," + quoteCharValue +
- DateTimeFormatter.ofPattern("EEEE, MMMM dd,
yyyy").format(localDt) + quoteCharValue + "," +
- addQuotingIfNeeded(String.format("¥ 123%1$s45",
decimalSeparator), quoteCharValue, true) + "\n" +
- addQuotingIfNeeded(String.format("1%2$s234%1$s46",
decimalSeparator, groupingSeparator), quoteCharValue, true) + "," +
quoteCharValue +
- DateTimeFormatter.ofPattern("d/M/yy
HH:mm").format(localDt) + quoteCharValue + "," +
- addQuotingIfNeeded(String.format("$ 1%2$s023%1$s45",
decimalSeparator, groupingSeparator), quoteCharValue, true) + "\n" +
- addQuotingIfNeeded(String.format("1%2$s234%1$s4560",
decimalSeparator, groupingSeparator), quoteCharValue, true) + "," +
quoteCharValue +
- DateTimeFormatter.ofPattern("hh:mm a").format(localDt) +
quoteCharValue + "," +
- addQuotingIfNeeded(String.format("£ 1%2$s023%1$s45",
decimalSeparator, groupingSeparator), quoteCharValue, true) + "\n" +
- addQuotingIfNeeded(String.format("9%1$s88E+08",
decimalSeparator), quoteCharValue, true) + "," + quoteCharValue +
- DateTimeFormatter.ofPattern("yyyy/MM/dd/
HH:mm").format(localDt) + quoteCharValue + "," +
- addQuotingIfNeeded(String.format("¥ 1%2$s023%1$s45",
decimalSeparator, groupingSeparator), quoteCharValue, true) + "\n" +
- addQuotingIfNeeded(String.format("9%1$s877E+08",
decimalSeparator), quoteCharValue, true) + ",,\n" +
- addQuotingIfNeeded(String.format("9%1$s8765E+08",
decimalSeparator), quoteCharValue, true) + ",,\n").replace("E+",
getExponentSeparator(decimalFormatSymbols)));
- }
-
- @Test
- public void testCustomEscapeCharWithEL() throws Exception {
- Map<String, String> attributes = new HashMap<>();
- attributes.put("csv.escape", "^");
- testRunner.enqueue(new
File("src/test/resources/dataformatting.xlsx").toPath(), attributes);
-
- testRunner.setProperty(CSVUtils.ESCAPE_CHAR, "${csv.escape}");
- testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES,
"true");
-
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long rowsSheet =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(9, rowsSheet);
-
- LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
- DecimalFormatSymbols decimalFormatSymbols =
DecimalFormatSymbols.getInstance();
- String escapeCharValue =
testRunner.getProcessContext().getProperty(CSVUtils.ESCAPE_CHAR).evaluateAttributeExpressions(ff).getValue();
- String decimalSeparator =
String.valueOf(decimalFormatSymbols.getDecimalSeparator()).equals(",")
- ? escapeCharValue + decimalFormatSymbols.getDecimalSeparator()
: String.valueOf(decimalFormatSymbols.getDecimalSeparator());
- String groupingSeparator =
String.valueOf(decimalFormatSymbols.getGroupingSeparator()).equals(",")
- ? escapeCharValue +
decimalFormatSymbols.getGroupingSeparator() :
String.valueOf(decimalFormatSymbols.getGroupingSeparator());
- ff.assertContentEquals(String.format("Numbers,Timestamps,Money\n" +
- "1234%1$s456," +
DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + ",$ 123%1$s45\n" +
- "1234%1$s46," + DateTimeFormatter.ofPattern("hh:mm:ss
a").format(localDt) + ",£ 123%1$s45\n" +
- "1234%1$s5," +
DateTimeFormatter.ofPattern(String.format("EEEE%1$s, MMMM dd%1$s, yyyy",
escapeCharValue)).format(localDt) + ",¥ 123%1$s45\n" +
- "1%2$s234%1$s46," + DateTimeFormatter.ofPattern("d/M/yy
HH:mm").format(localDt) + ",$ 1%2$s023%1$s45\n" +
- "1%2$s234%1$s4560," + DateTimeFormatter.ofPattern("hh:mm
a").format(localDt) + ",£ 1%2$s023%1$s45\n" +
- "9%1$s88E+08," + DateTimeFormatter.ofPattern("yyyy/MM/dd/
HH:mm").format(localDt) + ",¥ 1%2$s023%1$s45\n" +
- "9%1$s877E+08,,\n" +
- "9%1$s8765E+08,,\n", decimalSeparator,
groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
- }
-
- /**
- * Validates that all sheets in the Excel document are exported.
- *
- * @throws Exception
- * Any exception thrown during execution.
- */
- @Test
- public void testProcessAllSheets() throws Exception {
-
- testRunner.enqueue(new
File("src/test/resources/CollegeScorecard.xlsx").toPath());
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long l =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(10, l);
-
- testRunner.clearProvenanceEvents();
- testRunner.clearTransferState();
-
- testRunner.enqueue(new
File("src/test/resources/TwoSheets.xlsx").toPath());
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 2);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- l =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(4, l);
-
- ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(1);
- l =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(3, l);
- }
-
- /**
- * Validates that the manually specified sheet is exported from the Excel
document.
- *
- * @throws Exception
- * Any exception thrown during execution.
- */
- @Test
- public void testProcessASpecificSheetThatDoesExist() throws Exception {
-
- testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS,
"Scorecard");
- testRunner.enqueue(new
File("src/test/resources/CollegeScorecard.xlsx").toPath());
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long l =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(10, l);
- }
-
- /**
- * Tests for a syntactically valid Excel XSSF document with a manually
specified Excel sheet that does not exist.
- * In this scenario only the Original relationship should be invoked.
- *
- * @throws Exception
- * Any exception thrown during execution.
- */
- @Test
- public void testNonExistantSpecifiedSheetName() throws Exception {
-
- testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS,
"NopeIDoNotExist");
- testRunner.enqueue(new
File("src/test/resources/CollegeScorecard.xlsx").toPath());
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 0);
//We aren't expecting any output to success here because the sheet doesn't
exist
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
- assertFalse(testRunner.getLogger().getWarnMessages().isEmpty());
- }
-
- /**
- * Validates that a sheet contains blank cells can be converted to a CSV
without missing columns.
- *
- * @throws Exception
- * Any exception thrown during execution.
- */
- @Test
- public void testProcessASheetWithBlankCells() throws Exception {
-
- testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS,
"Sheet1");
- testRunner.enqueue(new
File("src/test/resources/with-blank-cells.xlsx").toPath());
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
-
- MockFlowFile ff =
testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
- long l =
Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
- assertEquals(8, l);
-
- ff.assertContentEquals(new
File("src/test/resources/with-blank-cells.csv"));
- }
-
- /**
- * Tests for graceful handling and error messaging of unsupported .XLS
files.
- */
- @Test
- public void testHandleUnsupportedXlsFile() throws Exception {
-
- testRunner.enqueue(new
File("src/test/resources/Unsupported.xls").toPath());
- testRunner.run();
-
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 0);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 0);
- testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 1);
-
- List<LogMessage> errorMessages =
testRunner.getLogger().getErrorMessages();
- assertEquals(1, errorMessages.size());
- String messageText = errorMessages.get(0).getMsg();
- assertTrue(messageText.contains("Excel") &&
messageText.contains("OLE2"));
- }
-
- private String addQuotingIfNeeded(String csvField) {
- return addQuotingIfNeeded(csvField, "\"", false);
- }
-
- private String addQuotingIfNeeded(String csvField, String csvQuote,
boolean force) {
- return csvField.contains(",") || force ? String.format("%2$s%1$s%2$s",
csvField, csvQuote) : csvField;
- }
-}
\ No newline at end of file
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/CollegeScorecard.xlsx
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/CollegeScorecard.xlsx
deleted file mode 100644
index 230ad0e669..0000000000
Binary files
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/CollegeScorecard.xlsx
and /dev/null differ
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/TwoSheets.xlsx
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/TwoSheets.xlsx
deleted file mode 100644
index f4977b1952..0000000000
Binary files
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/TwoSheets.xlsx
and /dev/null differ
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/Unsupported.xls
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/Unsupported.xls
deleted file mode 100644
index 6023329ba9..0000000000
Binary files
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/Unsupported.xls
and /dev/null differ
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/dataformatting.xlsx
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/dataformatting.xlsx
deleted file mode 100644
index a9428e2bda..0000000000
Binary files
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/dataformatting.xlsx
and /dev/null differ
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/logback-test.xml
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/logback-test.xml
deleted file mode 100644
index 5afbc8ea75..0000000000
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/logback-test.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<configuration scan="true" scanPeriod="30 seconds">
- <appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
- <encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
- <pattern>%-4r [%t] %-5p %c - %m%n</pattern>
- </encoder>
- </appender>
-
- <!-- valid logging levels: TRACE, DEBUG, INFO, WARN, ERROR -->
- <logger name="org.apache.nifi" level="WARN"/>
-
- <root level="INFO">
- <appender-ref ref="CONSOLE"/>
- </root>
-
-</configuration>
-
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/with-blank-cells.csv
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/with-blank-cells.csv
deleted file mode 100644
index ff3f706b06..0000000000
---
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/with-blank-cells.csv
+++ /dev/null
@@ -1,8 +0,0 @@
-A,B,C,D
-A1,,,
-,B2,C2,
-,,C3,
-,,C4,D4
-A5,,C5,D5
-A6,B6,,D6
-A7,B7,C7,D7
diff --git
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/with-blank-cells.xlsx
b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/with-blank-cells.xlsx
deleted file mode 100644
index a9482460aa..0000000000
Binary files
a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/with-blank-cells.xlsx
and /dev/null differ
diff --git a/nifi-nar-bundles/nifi-poi-bundle/pom.xml
b/nifi-nar-bundles/nifi-poi-bundle/pom.xml
index d4467f087d..8b187fd9c4 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/pom.xml
+++ b/nifi-nar-bundles/nifi-poi-bundle/pom.xml
@@ -28,7 +28,6 @@
<poi.version>5.2.3</poi.version>
</properties>
<modules>
- <module>nifi-poi-processors</module>
<module>nifi-poi-nar</module>
<module>nifi-poi-services</module>
</modules>