This is an automated email from the ASF dual-hosted git repository. arjansh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/metamodel.git
commit da011680c042cce0545cc15b4a0829ea6aa8f860 Author: Gerard Dellemann <g.dellem...@quadient.com> AuthorDate: Thu Dec 5 14:34:34 2019 +0100 MM-82 Detect Column Types - Refactoring based on comments in PR and better formatting --- excel/pom.xml | 2 +- .../excel/DefaultSpreadsheetReaderDelegate.java | 19 ++--- .../apache/metamodel/excel/ExcelConfiguration.java | 6 +- .../org/apache/metamodel/excel/ExcelUtils.java | 95 ++++++++++------------ .../metamodel/excel/ExcelConfigurationTest.java | 7 +- .../metamodel/excel/ExcelDataContextTest.java | 10 +-- 6 files changed, 59 insertions(+), 80 deletions(-) diff --git a/excel/pom.xml b/excel/pom.xml index a87f50f..e9f839b 100644 --- a/excel/pom.xml +++ b/excel/pom.xml @@ -40,7 +40,7 @@ under the License. <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> - <version>4.1.0</version> + <version>4.1.1</version> <exclusions> <exclusion> <groupId>commons-logging</groupId> diff --git a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java index 1265005..cb7595b 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java +++ b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java @@ -63,6 +63,7 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat private final Resource _resource; private final ExcelConfiguration _configuration; + private FormulaEvaluator _formulaEvaluator; public DefaultSpreadsheetReaderDelegate(Resource resource, ExcelConfiguration configuration) { _resource = resource; @@ -73,6 +74,7 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat public Schema createSchema(String schemaName) { final MutableSchema schema = new MutableSchema(schemaName); final Workbook wb = ExcelUtils.readWorkbook(_resource, true); + _formulaEvaluator = wb.getCreationHelper().createFormulaEvaluator(); try { for (int i = 0; i < wb.getNumberOfSheets(); i++) { final Sheet currentSheet = wb.getSheetAt(i); @@ -247,7 +249,8 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat return null; } - private void detectColumnTypes(final Row firstRow, final Iterator<Row> dataRowIterator, final ColumnType[] columnTypes) { + private void detectColumnTypes(final Row firstRow, final Iterator<Row> dataRowIterator, + final ColumnType[] columnTypes) { detectColumnTypesFirstRow(firstRow, columnTypes); detectColumnTypesOtherRows(dataRowIterator, columnTypes); @@ -292,7 +295,7 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat * @param cell * @return Returns a new {@link ColumnType} when detected. Otherwise null is returned. */ - private static ColumnType detectNewColumnTypeCell(final ColumnType currentColumnType, final Cell cell) { + private ColumnType detectNewColumnTypeCell(final ColumnType currentColumnType, final Cell cell) { // Can't detect something new if it's already on the default. if (currentColumnType != null && currentColumnType.equals(DEFAULT_COLUMN_TYPE)) { return null; @@ -319,7 +322,7 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat return null; } - private static ColumnType determineColumnTypeFromCell(final Cell cell) { + private ColumnType determineColumnTypeFromCell(final Cell cell) { switch (cell.getCellType()) { case NUMERIC: if (DateUtil.isCellDateFormatted(cell)) { @@ -330,12 +333,7 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat case BOOLEAN: return ColumnType.BOOLEAN; case FORMULA: - final FormulaEvaluator evaluator = cell - .getSheet() - .getWorkbook() - .getCreationHelper() - .createFormulaEvaluator(); - return determineColumnTypeFromCell(evaluator.evaluateInCell(cell)); + return determineColumnTypeFromCell(_formulaEvaluator.evaluateInCell(cell)); case STRING: // fall through case BLANK: @@ -367,7 +365,8 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat final int offset = getColumnOffset(row); // build columns based on cell values. - try (final ColumnNamingSession columnNamingSession = _configuration.getColumnNamingStrategy() + try (final ColumnNamingSession columnNamingSession = _configuration + .getColumnNamingStrategy() .startColumnNamingSession()) { for (int i = offset; i < rowLength; i++) { final Cell cell = row.getCell(i); diff --git a/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java b/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java index bf3e92d..a768f6d 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java +++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java @@ -146,11 +146,11 @@ public final class ExcelConfiguration extends BaseObject implements identifiers.add(numberOfLinesToScan); } - @Override - public String toString() { + @Override + public String toString() { return String .format("ExcelConfiguration[columnNameLineNumber=%s, skipEmptyLines=%s, skipEmptyColumns=%s, " + "detectColumnTypes=%s, numbersOfLinesToScan=%s]", columnNameLineNumber, skipEmptyLines, skipEmptyColumns, detectColumnTypes, numberOfLinesToScan); - } + } } diff --git a/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java b/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java index f8bf047..6e3a66f 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java +++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java @@ -43,7 +43,6 @@ import org.apache.metamodel.util.FileHelper; import org.apache.metamodel.util.FileResource; import org.apache.metamodel.util.InMemoryResource; import org.apache.metamodel.util.Resource; -import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.hssf.usermodel.HSSFFont; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.util.HSSFColor; @@ -176,8 +175,6 @@ final class ExcelUtils { return null; } - final String cellCoordinate = "(" + cell.getRowIndex() + "," + cell.getColumnIndex() + ")"; - final String result; switch (cell.getCellType()) { @@ -195,24 +192,23 @@ final class ExcelUtils { FormulaError formulaError = FormulaError.forInt(errorCode); errorResult = formulaError.getString(); } catch (RuntimeException e) { - logger.debug("Getting error code for {} failed!: {}", cellCoordinate, e.getMessage()); + logger.debug("Getting error code for {} failed!: {}", getCellCoordinates(cell), e.getMessage()); if (cell instanceof XSSFCell) { // hack to get error string, which is available String value = ((XSSFCell) cell).getErrorCellString(); errorResult = value; } else { - logger.error("Couldn't handle unexpected error scenario in cell: " + cellCoordinate, e); + logger.error("Couldn't handle unexpected error scenario in cell: " + getCellCoordinates(cell), e); throw e; } } result = errorResult; break; case FORMULA: - // result = cell.getCellFormula(); result = getFormulaCellValue(wb, cell); break; case NUMERIC: - if (HSSFDateUtil.isCellDateFormatted(cell)) { + if (DateUtil.isCellDateFormatted(cell)) { Date date = cell.getDateCellValue(); if (date == null) { result = null; @@ -220,9 +216,7 @@ final class ExcelUtils { result = DateUtils.createDateFormat().format(date); } } else { - // TODO: Consider not formatting it, but simple using - // Double.toString(...) - result = getNumericCellValueAsStringUsingBuildinFormat(cell.getCellStyle(), cell.getNumericCellValue()); + result = getNumericCellValueAsString(cell.getCellStyle(), cell.getNumericCellValue()); } break; case STRING: @@ -232,18 +226,16 @@ final class ExcelUtils { throw new IllegalStateException("Unknown cell type: " + cell.getCellType()); } - logger.debug("cell {} resolved to value: {}", cellCoordinate, result); + logger.debug("cell {} resolved to value: {}", getCellCoordinates(cell), result); return result; } - public static Object getCellValueAsObject(final Workbook wb, final Cell cell) { + private static Object getCellValueAsObject(final Workbook workbook, final Cell cell) { if (cell == null) { return null; } - final String cellCoordinate = "(" + cell.getRowIndex() + "," + cell.getColumnIndex() + ")"; - final Object result; switch (cell.getCellType()) { @@ -259,25 +251,25 @@ final class ExcelUtils { try { errorResult = FormulaError.forInt(cell.getErrorCellValue()).getString(); } catch (final RuntimeException e) { - logger.debug("Getting error code for {} failed!: {}", cellCoordinate, e.getMessage()); + logger.debug("Getting error code for {} failed!: {}", getCellCoordinates(cell), e.getMessage()); if (cell instanceof XSSFCell) { // hack to get error string, which is available errorResult = ((XSSFCell) cell).getErrorCellString(); } else { - logger.error("Couldn't handle unexpected error scenario in cell: " + cellCoordinate, e); + logger.error("Couldn't handle unexpected error scenario in cell: " + getCellCoordinates(cell), e); throw e; } } result = errorResult; break; case FORMULA: - result = getFormulaCellValueAsObject(wb, cell); + result = getFormulaCellValueAsObject(workbook, cell); break; case NUMERIC: if (DateUtil.isCellDateFormatted(cell)) { result = cell.getDateCellValue(); } else { - result = getIntegerOrDoubleValueFromDouble(cell.getNumericCellValue()); + result = getDoubleAsNumber(cell.getNumericCellValue()); } break; case STRING: @@ -287,27 +279,26 @@ final class ExcelUtils { throw new IllegalStateException("Unknown cell type: " + cell.getCellType()); } - logger.debug("cell {} resolved to value: {}", cellCoordinate, result); + logger.debug("cell {} resolved to value: {}", getCellCoordinates(cell), result); return result; } - public static Object getCellValueChecked(final Workbook wb, final Cell cell, final ColumnType expectedColumnType) { - final Object value = getCellValueAsObject(wb, cell); + public static Object getCellValueChecked(final Workbook workbook, final Cell cell, + final ColumnType expectedColumnType) { + final Object value = getCellValueAsObject(workbook, cell); if (value == null || value.getClass().equals(expectedColumnType.getJavaEquivalentClass())) { return value; } + // Don't log when an Integer value is in a Double column type if (!(value.getClass().equals(Integer.class) && expectedColumnType .getJavaEquivalentClass() .equals(Double.class)) && logger.isWarnEnabled()) { - // Don't log when a Integer value is in a Double column type - final String cellCoordinate = "(" + cell.getRowIndex() + "," + cell.getColumnIndex() + ")"; final String warning = String - .format("Cell %s has the value '%s' of data type '%s', which doesn't match the detected column's " - + "data type '%s'. This cell gets value NULL in the DataSet.", cellCoordinate, value, value - .getClass() - .getSimpleName(), expectedColumnType); + .format("Cell %s has the value '%s' of data type '%s', which doesn't match the detected " + + "column's data type '%s'. This cell gets value NULL in the DataSet.", getCellCoordinates( + cell), value, value.getClass().getSimpleName(), expectedColumnType); logger.warn(warning); } return null; @@ -317,9 +308,7 @@ final class ExcelUtils { // first try with a cached/precalculated value try { double numericCellValue = cell.getNumericCellValue(); - // TODO: Consider not formatting it, but simple using - // Double.toString(...) - return getNumericCellValueAsStringUsingBuildinFormat(cell.getCellStyle(), numericCellValue); + return getNumericCellValueAsString(cell.getCellStyle(), numericCellValue); } catch (Exception e) { if (logger.isInfoEnabled()) { logger.info("Failed to fetch cached/precalculated formula value of cell: " + cell, e); @@ -329,8 +318,9 @@ final class ExcelUtils { // evaluate cell first, if possible try { if (logger.isInfoEnabled()) { - logger.info("cell({},{}) is a formula. Attempting to evaluate: {}", - new Object[] { cell.getRowIndex(), cell.getColumnIndex(), cell.getCellFormula() }); + logger + .info("cell {} is a formula. Attempting to evaluate: {}", getCellCoordinates(cell), cell + .getCellFormula()); } final FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator(); @@ -340,8 +330,9 @@ final class ExcelUtils { return getCellValue(wb, evaluatedCell); } catch (RuntimeException e) { - logger.warn("Exception occurred while evaluating formula at position ({},{}): {}", - new Object[] { cell.getRowIndex(), cell.getColumnIndex(), e.getMessage() }); + logger + .warn("Exception occurred while evaluating formula at position {}: {}", getCellCoordinates(cell), e + .getMessage()); // Some exceptions we simply log - result will be then be the // actual formula if (e instanceof FormulaParseException) { @@ -357,10 +348,10 @@ final class ExcelUtils { return cell.getCellFormula(); } - private static Object getFormulaCellValueAsObject(final Workbook wb, final Cell cell) { + private static Object getFormulaCellValueAsObject(final Workbook workbook, final Cell cell) { // first try with a cached/precalculated value try { - return getIntegerOrDoubleValueFromDouble(cell.getNumericCellValue()); + return getDoubleAsNumber(cell.getNumericCellValue()); } catch (final Exception e) { if (logger.isInfoEnabled()) { logger.info("Failed to fetch cached/precalculated formula value of cell: " + cell, e); @@ -371,35 +362,29 @@ final class ExcelUtils { try { if (logger.isInfoEnabled()) { logger - .info("cell({},{}) is a formula. Attempting to evaluate: {}", cell.getRowIndex(), cell - .getColumnIndex(), cell.getCellFormula()); + .info("cell {} is a formula. Attempting to evaluate: {}", getCellCoordinates(cell), cell + .getCellFormula()); } - final FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator(); + final FormulaEvaluator evaluator = workbook.getCreationHelper().createFormulaEvaluator(); // calculates the formula and puts it's value back into the cell final Cell evaluatedCell = evaluator.evaluateInCell(cell); - return getCellValueAsObject(wb, evaluatedCell); + return getCellValueAsObject(workbook, evaluatedCell); + } catch (final FormulaParseException e) { + logger.error("Parse exception occurred while evaluating cell formula: " + cell, e); + } catch (final IllegalArgumentException e) { + logger.error("Illegal formula argument occurred while evaluating cell formula: " + cell, e); } catch (final RuntimeException e) { - logger - .warn("Exception occurred while evaluating formula at position ({},{}): {}", cell.getRowIndex(), - cell.getColumnIndex(), e.getMessage()); - // Some exceptions we simply log - result will be then be the actual formula - if (e instanceof FormulaParseException) { - logger.error("Parse exception occurred while evaluating cell formula: " + cell, e); - } else if (e instanceof IllegalArgumentException) { - logger.error("Illegal formula argument occurred while evaluating cell formula: " + cell, e); - } else { - logger.error("Unexpected exception occurred while evaluating cell formula: " + cell, e); - } + logger.error("Unexpected exception occurred while evaluating cell formula: " + cell, e); } // last resort: return the string formula return cell.getCellFormula(); } - private static Number getIntegerOrDoubleValueFromDouble(final double value) { + private static Number getDoubleAsNumber(final double value) { final Double doubleValue = Double.valueOf(value); if (doubleValue % 1 == 0 && doubleValue <= Integer.MAX_VALUE) { return Integer.valueOf(doubleValue.intValue()); @@ -579,7 +564,7 @@ final class ExcelUtils { return dataSet; } - private static String getNumericCellValueAsStringUsingBuildinFormat(final CellStyle cellStyle, final double cellValue) { + private static String getNumericCellValueAsString(final CellStyle cellStyle, final double cellValue) { final int formatIndex = cellStyle.getDataFormat(); String formatString = cellStyle.getDataFormatString(); if (formatString == null) { @@ -588,4 +573,8 @@ final class ExcelUtils { final DataFormatter formatter = new DataFormatter(); return formatter.formatRawCellContents(cellValue, formatIndex, formatString); } + + private static String getCellCoordinates(Cell cell) { + return String.format("(%s,%s)", cell.getRowIndex(), cell.getColumnIndex()); + } } diff --git a/excel/src/test/java/org/apache/metamodel/excel/ExcelConfigurationTest.java b/excel/src/test/java/org/apache/metamodel/excel/ExcelConfigurationTest.java index a652e72..f54980f 100644 --- a/excel/src/test/java/org/apache/metamodel/excel/ExcelConfigurationTest.java +++ b/excel/src/test/java/org/apache/metamodel/excel/ExcelConfigurationTest.java @@ -22,14 +22,13 @@ import junit.framework.TestCase; public class ExcelConfigurationTest extends TestCase { - public void testToString() throws Exception { + public void testToString() throws Exception { final ExcelConfiguration conf = new ExcelConfiguration(1, true, false); assertEquals(String .format("ExcelConfiguration[columnNameLineNumber=%s, skipEmptyLines=%s, skipEmptyColumns=%s, " + "detectColumnTypes=%s, numbersOfLinesToScan=%s]", ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE, - true, false, false, ExcelConfiguration.DEFAULT_NUMBERS_OF_LINES_TO_SCAN), - conf.toString()); - } + true, false, false, ExcelConfiguration.DEFAULT_NUMBERS_OF_LINES_TO_SCAN), conf.toString()); + } public void testEquals() throws Exception { ExcelConfiguration conf1 = new ExcelConfiguration(1, true, false); diff --git a/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java b/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java index 3d05c83..7fdfff9 100644 --- a/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java +++ b/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java @@ -910,18 +910,11 @@ public class ExcelDataContextTest extends TestCase { final Column columnD = table.getColumns().get(3); assertEquals("d", columnD.getName()); assertEquals(ColumnType.INTEGER, columnD.getType()); - final DataSet dataSetColumnD = dataContext - .query() - .from(table) - .select(columnD) - .where(columnD) - .eq(12) - .execute(); + final DataSet dataSetColumnD = dataContext.query().from(table).select(columnD).where(columnD).eq(12).execute(); assertTrue(dataSetColumnD.next()); assertEquals(12, dataSetColumnD.getRow().getValue(0)); } - public void testInsertingValueOfValidColumnType() { final ExcelDataContext dataContext = new ExcelDataContext(copyOf("src/test/resources/different_datatypes.xls"), new ExcelConfiguration(ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE, null, true, false, true, 19)); @@ -955,5 +948,4 @@ public class ExcelDataContextTest extends TestCase { final Table table = dataContext.getDefaultSchema().getTable(0); dataContext.executeUpdate(new Update(table).value("INTEGER", 1).value("INTEGER", "this is not an integer")); } - } \ No newline at end of file