This is an automated email from the ASF dual-hosted git repository.

arjansh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/metamodel.git

commit a58159ab60c113745036075de117bd9044b02bea
Author: Gerard Dellemann <g.dellem...@quadient.com>
AuthorDate: Wed Dec 4 17:19:54 2019 +0100

    MM-82 Detect Column Types
---
 excel/pom.xml                                      |   2 +-
 .../excel/DefaultSpreadsheetReaderDelegate.java    | 214 +++++++++++++++++++--
 .../apache/metamodel/excel/ExcelConfiguration.java |  47 ++++-
 .../apache/metamodel/excel/ExcelDataContext.java   |   2 +-
 .../org/apache/metamodel/excel/ExcelUtils.java     | 160 ++++++++++++++-
 .../metamodel/excel/ExcelConfigurationTest.java    |  10 +-
 .../metamodel/excel/ExcelDataContextTest.java      | 164 ++++++++++++++++
 excel/src/test/resources/different_datatypes.xls   | Bin 0 -> 35840 bytes
 excel/src/test/resources/different_datatypes.xlsx  | Bin 0 -> 9870 bytes
 9 files changed, 558 insertions(+), 41 deletions(-)

diff --git a/excel/pom.xml b/excel/pom.xml
index 7f8e8f0..a87f50f 100644
--- a/excel/pom.xml
+++ b/excel/pom.xml
@@ -40,7 +40,7 @@ under the License.
                <dependency>
                        <groupId>org.apache.poi</groupId>
                        <artifactId>poi-ooxml</artifactId>
-                       <version>4.0.1</version>
+                       <version>4.1.0</version>
                        <exclusions>
                                <exclusion>
                                        <groupId>commons-logging</groupId>
diff --git 
a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java
 
b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java
index 97ba50f..41e2b75 100644
--- 
a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java
+++ 
b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java
@@ -18,6 +18,7 @@
  */
 package org.apache.metamodel.excel;
 
+import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 import java.util.stream.Collectors;
@@ -41,6 +42,8 @@ import 
org.apache.metamodel.schema.naming.ColumnNamingStrategy;
 import org.apache.metamodel.util.FileHelper;
 import org.apache.metamodel.util.Resource;
 import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.DateUtil;
+import org.apache.poi.ss.usermodel.FormulaEvaluator;
 import org.apache.poi.ss.usermodel.Row;
 import org.apache.poi.ss.usermodel.Sheet;
 import org.apache.poi.ss.usermodel.Workbook;
@@ -53,6 +56,9 @@ import org.slf4j.LoggerFactory;
  */
 final class DefaultSpreadsheetReaderDelegate implements 
SpreadsheetReaderDelegate {
 
+    static final ColumnType DEFAULT_COLUMN_TYPE = ColumnType.STRING;
+    static final ColumnType LEGACY_COLUMN_TYPE = ColumnType.VARCHAR;
+
     private static final Logger logger = 
LoggerFactory.getLogger(DefaultSpreadsheetReaderDelegate.class);
 
     private final Resource _resource;
@@ -128,6 +134,11 @@ final class DefaultSpreadsheetReaderDelegate implements 
SpreadsheetReaderDelegat
             row = rowIterator.next();
         }
 
+        final ColumnType[] columnTypes = getColumnTypes(sheet);
+        if (columnTypes == null) {
+            return table;
+        }
+
         final int columnNameLineNumber = 
_configuration.getColumnNameLineNumber();
         if (columnNameLineNumber == ExcelConfiguration.NO_COLUMN_NAME_LINE) {
 
@@ -146,34 +157,196 @@ final class DefaultSpreadsheetReaderDelegate implements 
SpreadsheetReaderDelegat
                     columnNamingSession.getNextColumnName(new 
ColumnNamingContextImpl(i));
                 }
 
-                for (int j = offset; j < row.getLastCellNum(); j++) {
-                    final ColumnNamingContext namingContext = new 
ColumnNamingContextImpl(table, null, j);
+                for (int i = offset; i < row.getLastCellNum(); i++) {
+                    final ColumnNamingContext namingContext = new 
ColumnNamingContextImpl(table, null, i);
                     final Column column = new 
MutableColumn(columnNamingSession.getNextColumnName(namingContext),
-                            ColumnType.STRING, table, j, true);
+                            columnTypes[i], table, i, true);
                     table.addColumn(column);
                 }
             }
 
         } else {
+            row = iterateToColumnNameRow(rowIterator, row);
 
-            boolean hasColumns = true;
+            if (row != null) {
+                createColumns(table, wb, row, columnTypes);
+            }
+        }
 
-            // iterate to the column name line number (if above 1)
-            for (int j = 1; j < columnNameLineNumber; j++) {
-                if (rowIterator.hasNext()) {
-                    row = rowIterator.next();
-                } else {
-                    hasColumns = false;
-                    break;
+        return table;
+    }
+
+    /**
+     * Iterate to the column name row if the configured ColumnNameLineNumber 
is above 1.
+     * @param rowIterator
+     * @param currentRow
+     * @return Returns the column name row. Returns the current row if the 
configured ColumnNameLineNumber is 1.
+     * Returns null if the columnName row is not found.
+     */
+    private Row iterateToColumnNameRow(final Iterator<Row> rowIterator, final 
Row currentRow) {
+        Row row = currentRow;
+
+        // iterate to the column name line number (if above 1)
+        for (int i = 1; i < _configuration.getColumnNameLineNumber(); i++) {
+            if (rowIterator.hasNext()) {
+                row = rowIterator.next();
+            } else {
+                return null;
+            }
+        }
+
+        return row;
+    }
+
+    /**
+     * Get an array of {@link ColumnType}s. The length of the array is 
determined by the header row. If there's no
+     * configured column name line, then the first data row is used. If the 
{@link ColumnType} should be detected, then
+     * this is done by using the data rows only. If this shouldn't be 
detected, then the array is filled with either
+     * default column type when there is no column name line or legacy column 
type when there is a column name line.
+     * @param sheet
+     * @return
+     */
+    private ColumnType[] getColumnTypes(final Sheet sheet) {
+        // To find the array length we need the header
+        final Iterator<Row> iterator = ExcelUtils.getRowIterator(sheet, 
_configuration, false);
+        Row row;
+        if (_configuration.getColumnNameLineNumber() == 
ExcelConfiguration.NO_COLUMN_NAME_LINE) {
+            row = findTheFirstNonEmptyRow(iterator);
+        } else {
+            row = iterateToColumnNameRow(iterator, iterator.next());
+        }
+        if (row == null) {
+            return null;
+        }
+
+        final ColumnType[] columnTypes = new ColumnType[row.getLastCellNum()];
+
+        if (_configuration.isDetectColumnTypes()) {
+            // Now we need the first data row
+            row = findTheFirstNonEmptyRow(iterator);
+            if (row != null) {
+                detectColumnTypes(row, iterator, columnTypes);
+            }
+        } else {
+            if (_configuration.getColumnNameLineNumber() == 
ExcelConfiguration.NO_COLUMN_NAME_LINE) {
+                Arrays.fill(columnTypes, DEFAULT_COLUMN_TYPE);
+            } else {
+                Arrays.fill(columnTypes, LEGACY_COLUMN_TYPE);
+            }
+        }
+        return columnTypes;
+    }
+
+    private static Row findTheFirstNonEmptyRow(final Iterator<Row> 
rowIterator) {
+        while (rowIterator.hasNext()) {
+            final Row row = rowIterator.next();
+            if (row != null) {
+                return row;
+            }
+        }
+        return null;
+    }
+
+    private void detectColumnTypes(Row firstRow, final Iterator<Row> 
dataRowIterator, final ColumnType[] columnTypes) {
+        detectColumnTypesFirstRow(firstRow, columnTypes);
+        detectColumnTypesOtherRows(dataRowIterator, columnTypes);
+
+        // If all cells are null, then this loop sets the column type to the 
default
+        for (int i = 0; i < columnTypes.length; i++) {
+            if (columnTypes[i] == null) {
+                columnTypes[i] = DEFAULT_COLUMN_TYPE;
+            }
+        }
+    }
+
+    private void detectColumnTypesFirstRow(final Row firstRow, final 
ColumnType[] columnTypes) {
+        if (firstRow != null && firstRow.getLastCellNum() > 0) {
+            for (int i = getColumnOffset(firstRow); i < columnTypes.length; 
i++) {
+                if (firstRow.getCell(i) != null) {
+                    columnTypes[i] = 
determineColumnTypeFromCell(firstRow.getCell(i));
+                }
+            }
+        }
+    }
+
+    private void detectColumnTypesOtherRows(final Iterator<Row> 
dataRowIterator, final ColumnType[] columnTypes) {
+        int numberOfLinesToScan = _configuration.getNumberOfLinesToScan() - 1;
+
+        while (dataRowIterator.hasNext() && numberOfLinesToScan-- > 0) {
+            final Row currentRow = dataRowIterator.next();
+            if (currentRow != null && currentRow.getLastCellNum() > 0) {
+                for (int i = getColumnOffset(currentRow); i < 
columnTypes.length; i++) {
+                    final ColumnType detectNewColumnType = 
detectNewColumnTypeCell(columnTypes[i], currentRow
+                            .getCell(i));
+                    if (detectNewColumnType != null) {
+                        columnTypes[i] = detectNewColumnType;
+                    }
                 }
             }
+        }
+    }
 
-            if (hasColumns) {
-                createColumns(table, wb, row);
+    /**
+     * Tries to detect a new {@link ColumnType} for a cell.
+     * @param currentColumnType
+     * @param cell
+     * @return Returns a new {@link ColumnType} when detected. Otherwise null 
is returned.
+     */
+    private static ColumnType detectNewColumnTypeCell(final ColumnType 
currentColumnType, final Cell cell) {
+        // Can't detect something new if it's already on the default.
+        if (currentColumnType != null && 
currentColumnType.equals(DEFAULT_COLUMN_TYPE)) {
+            return null;
+        }
+        // Skip if the cell is null. This way 1 missing cell can't influence 
the column type of all other cells.
+        if (cell == null) {
+            return null;
+        }
+
+        final ColumnType detectedColumnType = 
determineColumnTypeFromCell(cell);
+        if (currentColumnType == null) {
+            return detectedColumnType;
+        } else if (!currentColumnType.equals(detectedColumnType)) {
+            // If the column type is Double and a Integer is detected, then 
don't set it to Integer
+            if (currentColumnType.equals(ColumnType.INTEGER) && 
detectedColumnType.equals(ColumnType.DOUBLE)) {
+                // If the column type is Integer and a Double is detected, 
then set it to Double
+                return detectedColumnType;
+            } else if (currentColumnType.equals(ColumnType.DOUBLE) && 
detectedColumnType.equals(ColumnType.INTEGER)) {
+                return null;
+            } else {
+                return DEFAULT_COLUMN_TYPE;
             }
         }
+        return null;
+    }
 
-        return table;
+    private static ColumnType determineColumnTypeFromCell(final Cell cell) {
+        switch (cell.getCellType()) {
+            case NUMERIC:
+                if (DateUtil.isCellDateFormatted(cell)) {
+                    return ColumnType.DATE;
+                } else {
+                    return cell.getNumericCellValue() % 1 == 0 ? 
ColumnType.INTEGER : ColumnType.DOUBLE;
+                }
+            case BOOLEAN:
+                return ColumnType.BOOLEAN;
+            case FORMULA:
+                final FormulaEvaluator evaluator = cell
+                        .getSheet()
+                        .getWorkbook()
+                        .getCreationHelper()
+                        .createFormulaEvaluator();
+                return 
determineColumnTypeFromCell(evaluator.evaluateInCell(cell));
+            case STRING:
+                // fall through
+            case BLANK:
+                // fall through
+            case _NONE:
+                // fall through
+            case ERROR:
+                // fall through
+            default:
+                return DEFAULT_COLUMN_TYPE;
+            }
     }
 
     /**
@@ -183,7 +356,8 @@ final class DefaultSpreadsheetReaderDelegate implements 
SpreadsheetReaderDelegat
      * @param wb
      * @param row
      */
-    private void createColumns(MutableTable table, Workbook wb, Row row) {
+    private void createColumns(final MutableTable table, final Workbook wb, 
final Row row,
+            final ColumnType[] columnTypes) {
         if (row == null) {
             logger.warn("Cannot create columns based on null row!");
             return;
@@ -195,13 +369,13 @@ final class DefaultSpreadsheetReaderDelegate implements 
SpreadsheetReaderDelegat
         // build columns based on cell values.
         try (final ColumnNamingSession columnNamingSession = 
_configuration.getColumnNamingStrategy()
                 .startColumnNamingSession()) {
-            for (int j = offset; j < rowLength; j++) {
-                final Cell cell = row.getCell(j);
-                final String intrinsicColumnName = ExcelUtils.getCellValue(wb, 
cell);
+            for (int i = offset; i < rowLength; i++) {
+                final Cell cell = row.getCell(i);
+                final String intrinsicColumnName = (String) 
ExcelUtils.getCellValue(wb, cell);
                 final ColumnNamingContext columnNamingContext = new 
ColumnNamingContextImpl(table, intrinsicColumnName,
-                        j);
+                        i);
                 final String columnName = 
columnNamingSession.getNextColumnName(columnNamingContext);
-                final Column column = new MutableColumn(columnName, 
ColumnType.VARCHAR, table, j, true);
+                final Column column = new MutableColumn(columnName, 
columnTypes[i], table, i, true);
                 table.addColumn(column);
             }
         }
diff --git 
a/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java 
b/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java
index 4779bb1..52d2ad5 100644
--- a/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java
+++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java
@@ -37,26 +37,39 @@ public final class ExcelConfiguration extends BaseObject 
implements
 
        public static final int NO_COLUMN_NAME_LINE = 0;
        public static final int DEFAULT_COLUMN_NAME_LINE = 1;
+    public static final int DEFAULT_NUMBERS_OF_LINES_TO_SCAN = 10;
 
        private final int columnNameLineNumber;
        private final ColumnNamingStrategy columnNamingStrategy;
        private final boolean skipEmptyLines;
        private final boolean skipEmptyColumns;
+    private final boolean detectColumnTypes;
+    private final int numberOfLinesToScan;
 
        public ExcelConfiguration() {
                this(DEFAULT_COLUMN_NAME_LINE, true, false);
        }
 
-    public ExcelConfiguration(int columnNameLineNumber, boolean 
skipEmptyLines, boolean skipEmptyColumns) {
+    public ExcelConfiguration(final int columnNameLineNumber, final boolean 
skipEmptyLines,
+            final boolean skipEmptyColumns) {
         this(columnNameLineNumber, null, skipEmptyLines, skipEmptyColumns);
     }
 
-    public ExcelConfiguration(int columnNameLineNumber, ColumnNamingStrategy 
columnNamingStrategy,
-            boolean skipEmptyLines, boolean skipEmptyColumns) {
+    public ExcelConfiguration(final int columnNameLineNumber, final 
ColumnNamingStrategy columnNamingStrategy,
+            final boolean skipEmptyLines, final boolean skipEmptyColumns) {
+        this(columnNameLineNumber, columnNamingStrategy, skipEmptyLines, 
skipEmptyColumns, false,
+                DEFAULT_NUMBERS_OF_LINES_TO_SCAN);
+    }
+
+    public ExcelConfiguration(final int columnNameLineNumber, final 
ColumnNamingStrategy columnNamingStrategy,
+            final boolean skipEmptyLines, final boolean skipEmptyColumns, 
final boolean detectColumnTypes,
+            final int numberOfLinesToScan) {
         this.columnNameLineNumber = columnNameLineNumber;
         this.skipEmptyLines = skipEmptyLines;
         this.skipEmptyColumns = skipEmptyColumns;
         this.columnNamingStrategy = columnNamingStrategy;
+        this.detectColumnTypes = detectColumnTypes;
+        this.numberOfLinesToScan = numberOfLinesToScan;
     }
     
     /**
@@ -102,17 +115,39 @@ public final class ExcelConfiguration extends BaseObject 
implements
                return skipEmptyColumns;
        }
 
+    /**
+     * Defines if columns in the excel spreadsheet should be validated on 
datatypes while
+     * reading the spreadsheet.
+     * 
+     * @return a boolean indicating whether or not to validate column types.
+     */
+    public boolean isDetectColumnTypes() {
+        return detectColumnTypes;
+    }
+
+    /**
+     * The number of lines to scan when detecting the column types
+     * 
+     * @return an int indicating the numbers of lines that will be scanned
+     */
+    public int getNumberOfLinesToScan() {
+        return numberOfLinesToScan;
+    }
+
        @Override
        protected void decorateIdentity(List<Object> identifiers) {
                identifiers.add(columnNameLineNumber);
                identifiers.add(skipEmptyLines);
                identifiers.add(skipEmptyColumns);
+        identifiers.add(detectColumnTypes);
+        identifiers.add(numberOfLinesToScan);
        }
 
        @Override
        public String toString() {
-               return "ExcelConfiguration[columnNameLineNumber="
-                               + columnNameLineNumber + ", skipEmptyLines=" + 
skipEmptyLines
-                               + ", skipEmptyColumns=" + skipEmptyColumns + 
"]";
+        return String
+                .format("ExcelConfiguration[columnNameLineNumber=%s, 
skipEmptyLines=%s, skipEmptyColumns=%s, "
+                        + "detectColumnTypes=%s, numbersOfLinesToScan=%s]", 
columnNameLineNumber, skipEmptyLines,
+                        skipEmptyColumns, detectColumnTypes, 
numberOfLinesToScan);
        }
 }
diff --git 
a/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java 
b/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java
index b1f8149..0829109 100644
--- a/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java
+++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java
@@ -189,7 +189,7 @@ public final class ExcelDataContext extends 
QueryPostprocessDataContext implemen
                 if (_spreadsheetReaderDelegate == null) {
                     _spreadsheetReaderDelegate = _resource.read(in -> {
                         try {
-                            if (FileMagic.valueOf(in) == FileMagic.OOXML) {
+                            if (FileMagic.valueOf(in) == FileMagic.OOXML && 
!_configuration.isDetectColumnTypes()) {
                                 return new 
XlsxSpreadsheetReaderDelegate(_resource, _configuration);
                             } else {
                                 return new 
DefaultSpreadsheetReaderDelegate(_resource, _configuration);
diff --git a/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java 
b/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java
index 2da6ef3..f05e86c 100644
--- a/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java
+++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java
@@ -19,7 +19,6 @@
 package org.apache.metamodel.excel;
 
 import java.io.File;
-import java.text.NumberFormat;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.List;
@@ -37,11 +36,11 @@ import org.apache.metamodel.data.Style;
 import org.apache.metamodel.data.Style.SizeUnit;
 import org.apache.metamodel.data.StyleBuilder;
 import org.apache.metamodel.query.SelectItem;
+import org.apache.metamodel.schema.ColumnType;
 import org.apache.metamodel.schema.Table;
 import org.apache.metamodel.util.DateUtils;
 import org.apache.metamodel.util.FileHelper;
 import org.apache.metamodel.util.FileResource;
-import org.apache.metamodel.util.FormatHelper;
 import org.apache.metamodel.util.InMemoryResource;
 import org.apache.metamodel.util.Resource;
 import org.apache.poi.hssf.usermodel.HSSFDateUtil;
@@ -49,9 +48,11 @@ import org.apache.poi.hssf.usermodel.HSSFFont;
 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.hssf.util.HSSFColor;
 import org.apache.poi.ss.formula.FormulaParseException;
+import org.apache.poi.ss.usermodel.BuiltinFormats;
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.CellStyle;
 import org.apache.poi.ss.usermodel.Color;
+import org.apache.poi.ss.usermodel.DataFormatter;
 import org.apache.poi.ss.usermodel.FillPatternType;
 import org.apache.poi.ss.usermodel.Font;
 import org.apache.poi.ss.usermodel.FontUnderline;
@@ -76,8 +77,6 @@ final class ExcelUtils {
 
     private static final Logger logger = 
LoggerFactory.getLogger(ExcelUtils.class);
 
-    private static final NumberFormat _numberFormat = 
FormatHelper.getUiNumberFormat();
-
     private ExcelUtils() {
         // prevent instantiation
     }
@@ -222,7 +221,62 @@ final class ExcelUtils {
             } else {
                 // TODO: Consider not formatting it, but simple using
                 // Double.toString(...)
-                result = _numberFormat.format(cell.getNumericCellValue());
+                result = 
getNumericCellValueAsStringUsingBuildinFormat(cell.getCellStyle(), 
cell.getNumericCellValue());
+            }
+            break;
+        case STRING:
+            result = cell.getRichStringCellValue().getString();
+            break;
+        default:
+            throw new IllegalStateException("Unknown cell type: " + 
cell.getCellType());
+        }
+
+        logger.debug("cell {} resolved to value: {}", cellCoordinate, result);
+
+        return result;
+    }
+
+    public static Object getCellValueAsObject(Workbook wb, Cell cell) {
+        if (cell == null) {
+            return null;
+        }
+
+        final String cellCoordinate = "(" + cell.getRowIndex() + "," + 
cell.getColumnIndex() + ")";
+
+        final Object result;
+
+        switch (cell.getCellType()) {
+        case BLANK:
+        case _NONE:
+            result = null;
+            break;
+        case BOOLEAN:
+            result = Boolean.valueOf(cell.getBooleanCellValue());
+            break;
+        case ERROR:
+            String errorResult;
+            try {
+                errorResult = 
FormulaError.forInt(cell.getErrorCellValue()).getString();
+            } catch (RuntimeException e) {
+                logger.debug("Getting error code for {} failed!: {}", 
cellCoordinate, e.getMessage());
+                if (cell instanceof XSSFCell) {
+                    // hack to get error string, which is available
+                    errorResult = ((XSSFCell) cell).getErrorCellString();
+                } else {
+                    logger.error("Couldn't handle unexpected error scenario in 
cell: " + cellCoordinate, e);
+                    throw e;
+                }
+            }
+            result = errorResult;
+            break;
+        case FORMULA:
+            result = getFormulaCellValueAsObject(wb, cell);
+            break;
+        case NUMERIC:
+            if (HSSFDateUtil.isCellDateFormatted(cell)) {
+                result = cell.getDateCellValue();
+            } else {
+                result = 
getIntegerOrDoubleValueFromDouble(cell.getNumericCellValue());
             }
             break;
         case STRING:
@@ -237,13 +291,34 @@ final class ExcelUtils {
         return result;
     }
 
+    public static Object getCellValueChecked(final Workbook wb, final Cell 
cell, final ColumnType expectedColumnType) {
+        final Object value = getCellValueAsObject(wb, cell);
+        if (value == null || 
value.getClass().equals(expectedColumnType.getJavaEquivalentClass())) {
+            return value;
+        }
+        
+        if (!(value.getClass().equals(Integer.class) && expectedColumnType
+                .getJavaEquivalentClass()
+                .equals(Double.class)) && logger.isWarnEnabled()) {
+            // Don't log when a Integer value is in a Double column type
+            final String cellCoordinate = "(" + cell.getRowIndex() + "," + 
cell.getColumnIndex() + ")";
+            final String warning = String
+                    .format("Cell %s has the value '%s' of data type '%s', 
which doesn't match the detected column's "
+                            + "data type '%s'. This cell gets value NULL in 
the DataSet.", cellCoordinate, value, value
+                                    .getClass()
+                                    .getSimpleName(), expectedColumnType);
+            logger.warn(warning);
+        }
+        return null;
+    }
+
     private static String getFormulaCellValue(Workbook wb, Cell cell) {
         // first try with a cached/precalculated value
         try {
             double numericCellValue = cell.getNumericCellValue();
             // TODO: Consider not formatting it, but simple using
             // Double.toString(...)
-            return _numberFormat.format(numericCellValue);
+            return 
getNumericCellValueAsStringUsingBuildinFormat(cell.getCellStyle(), 
numericCellValue);
         } catch (Exception e) {
             if (logger.isInfoEnabled()) {
                 logger.info("Failed to fetch cached/precalculated formula 
value of cell: " + cell, e);
@@ -281,6 +356,57 @@ final class ExcelUtils {
         return cell.getCellFormula();
     }
 
+    private static Object getFormulaCellValueAsObject(final Workbook wb, final 
Cell cell) {
+        // first try with a cached/precalculated value
+        try {
+            return 
getIntegerOrDoubleValueFromDouble(cell.getNumericCellValue());
+        } catch (Exception e) {
+            if (logger.isInfoEnabled()) {
+                logger.info("Failed to fetch cached/precalculated formula 
value of cell: " + cell, e);
+            }
+        }
+
+        // evaluate cell first, if possible
+        try {
+            if (logger.isInfoEnabled()) {
+                logger
+                        .info("cell({},{}) is a formula. Attempting to 
evaluate: {}", cell.getRowIndex(), cell
+                                .getColumnIndex(), cell.getCellFormula());
+            }
+
+            final FormulaEvaluator evaluator = 
wb.getCreationHelper().createFormulaEvaluator();
+
+            // calculates the formula and puts it's value back into the cell
+            final Cell evaluatedCell = evaluator.evaluateInCell(cell);
+
+            return getCellValueAsObject(wb, evaluatedCell);
+        } catch (RuntimeException e) {
+            logger
+                    .warn("Exception occurred while evaluating formula at 
position ({},{}): {}", cell.getRowIndex(),
+                            cell.getColumnIndex(), e.getMessage());
+            // Some exceptions we simply log - result will be then be the 
actual formula
+            if (e instanceof FormulaParseException) {
+                logger.error("Parse exception occurred while evaluating cell 
formula: " + cell, e);
+            } else if (e instanceof IllegalArgumentException) {
+                logger.error("Illegal formula argument occurred while 
evaluating cell formula: " + cell, e);
+            } else {
+                logger.error("Unexpected exception occurred while evaluating 
cell formula: " + cell, e);
+            }
+        }
+
+        // last resort: return the string formula
+        return cell.getCellFormula();
+    }
+
+    private static Number getIntegerOrDoubleValueFromDouble(double value) {
+        final Double doubleValue = Double.valueOf(value);
+        if (doubleValue % 1 == 0 && doubleValue <= Integer.MAX_VALUE) {
+            return Integer.valueOf(doubleValue.intValue());
+        } else {
+            return doubleValue;
+        }
+    }
+
     public static Style getCellStyle(Workbook workbook, Cell cell) {
         if (cell == null) {
             return Style.NO_STYLE;
@@ -414,13 +540,21 @@ final class ExcelUtils {
      */
     public static DefaultRow createRow(Workbook workbook, Row row, 
DataSetHeader header) {
         final int size = header.size();
-        final String[] values = new String[size];
+        final Object[] values = new Object[size];
         final Style[] styles = new Style[size];
         if (row != null) {
             for (int i = 0; i < size; i++) {
                 final int columnNumber = 
header.getSelectItem(i).getColumn().getColumnNumber();
+                final ColumnType columnType = 
header.getSelectItem(i).getColumn().getType();
                 final Cell cell = row.getCell(columnNumber);
-                final String value = ExcelUtils.getCellValue(workbook, cell);
+                final Object value;
+                if 
(columnType.equals(DefaultSpreadsheetReaderDelegate.DEFAULT_COLUMN_TYPE) || 
columnType
+                        
.equals(DefaultSpreadsheetReaderDelegate.LEGACY_COLUMN_TYPE)) {
+                    value = ExcelUtils.getCellValue(workbook, cell);
+                } else {
+                    value = ExcelUtils.getCellValueChecked(workbook, cell, 
columnType);
+                }
+
                 final Style style = ExcelUtils.getCellStyle(workbook, cell);
                 values[i] = value;
                 styles[i] = style;
@@ -443,4 +577,14 @@ final class ExcelUtils {
         final DataSet dataSet = new XlsDataSet(selectItems, workbook, 
rowIterator);
         return dataSet;
     }
+
+    private static String 
getNumericCellValueAsStringUsingBuildinFormat(CellStyle cellStyle, double 
cellValue) {
+        int formatIndex = cellStyle.getDataFormat();
+        String formatString = cellStyle.getDataFormatString();
+        if (formatString == null) {
+            formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
+        }
+        final DataFormatter formatter = new DataFormatter();
+        return formatter.formatRawCellContents(cellValue, formatIndex, 
formatString);
+    }
 }
diff --git 
a/excel/src/test/java/org/apache/metamodel/excel/ExcelConfigurationTest.java 
b/excel/src/test/java/org/apache/metamodel/excel/ExcelConfigurationTest.java
index 6e7559c..a652e72 100644
--- a/excel/src/test/java/org/apache/metamodel/excel/ExcelConfigurationTest.java
+++ b/excel/src/test/java/org/apache/metamodel/excel/ExcelConfigurationTest.java
@@ -18,16 +18,16 @@
  */
 package org.apache.metamodel.excel;
 
-import org.apache.metamodel.excel.ExcelConfiguration;
-
 import junit.framework.TestCase;
 
 public class ExcelConfigurationTest extends TestCase {
 
        public void testToString() throws Exception {
-               ExcelConfiguration conf = new ExcelConfiguration(1, true, 
false);
-               assertEquals(
-                               "ExcelConfiguration[columnNameLineNumber=1, 
skipEmptyLines=true, skipEmptyColumns=false]",
+        final ExcelConfiguration conf = new ExcelConfiguration(1, true, false);
+        assertEquals(String
+                .format("ExcelConfiguration[columnNameLineNumber=%s, 
skipEmptyLines=%s, skipEmptyColumns=%s, "
+                        + "detectColumnTypes=%s, numbersOfLinesToScan=%s]", 
ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE,
+                        true, false, false, 
ExcelConfiguration.DEFAULT_NUMBERS_OF_LINES_TO_SCAN),
                                conf.toString());
        }
 
diff --git 
a/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java 
b/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java
index 13cfdec..3d05c83 100644
--- a/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java
+++ b/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java
@@ -21,8 +21,10 @@ package org.apache.metamodel.excel;
 import java.io.File;
 import java.util.Arrays;
 import java.util.List;
+import java.util.stream.IntStream;
 
 import org.apache.metamodel.DataContext;
+import org.apache.metamodel.MetaModelException;
 import org.apache.metamodel.MetaModelHelper;
 import org.apache.metamodel.UpdateCallback;
 import org.apache.metamodel.UpdateScript;
@@ -30,15 +32,19 @@ import org.apache.metamodel.data.DataSet;
 import org.apache.metamodel.data.Row;
 import org.apache.metamodel.data.Style;
 import org.apache.metamodel.data.StyleBuilder;
+import org.apache.metamodel.insert.InsertInto;
 import org.apache.metamodel.query.Query;
 import org.apache.metamodel.schema.Column;
+import org.apache.metamodel.schema.ColumnType;
 import org.apache.metamodel.schema.Schema;
 import org.apache.metamodel.schema.Table;
 import org.apache.metamodel.schema.naming.CustomColumnNamingStrategy;
+import org.apache.metamodel.update.Update;
 import org.apache.metamodel.util.DateUtils;
 import org.apache.metamodel.util.FileHelper;
 import org.apache.metamodel.util.FileResource;
 import org.apache.metamodel.util.Month;
+import org.junit.Test;
 
 import junit.framework.TestCase;
 
@@ -792,4 +798,162 @@ public class ExcelDataContextTest extends TestCase {
         assertNotNull(table.getColumnByName(secondColumnName));
         assertNotNull(table.getColumnByName(thirdColumnName));
     }
+
+    public void testDetectingDifferentDataTypesInXls() throws Exception {
+        detectingDataTypes("src/test/resources/different_datatypes.xls");
+    }
+
+    public void testDifferentDataTypesInXlsx() throws Exception {
+        detectingDataTypes("src/test/resources/different_datatypes.xlsx");
+    }
+
+    private void detectingDataTypes(final String file) {
+        final DataContext dataContext = new ExcelDataContext(copyOf(file), new 
ExcelConfiguration(
+                ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE, null, true, 
false, true, 19));
+
+        final Schema schema = dataContext.getDefaultSchema();
+        assertEquals(2, schema.getTableCount());
+
+        final Table table = schema.getTables().get(0);
+        assertEquals("INTEGER", table.getColumns().get(0).getName());
+        assertEquals(ColumnType.INTEGER, table.getColumns().get(0).getType());
+        assertEquals("TEXT", table.getColumns().get(1).getName());
+        assertEquals(ColumnType.STRING, table.getColumns().get(1).getType());
+        assertEquals("FORMULA", table.getColumns().get(2).getName());
+        assertEquals(ColumnType.INTEGER, table.getColumns().get(2).getType());
+        assertEquals("MIXING_DOUBLE_AND_INT", 
table.getColumns().get(3).getName());
+        assertEquals(ColumnType.DOUBLE, table.getColumns().get(3).getType());
+        assertEquals("MIXING_OTHER_DATATYPES", 
table.getColumns().get(4).getName());
+        assertEquals(ColumnType.STRING, table.getColumns().get(4).getType());
+        final DataSet countDataSet = 
dataContext.query().from(table).selectCount().execute();
+        assertTrue(countDataSet.next());
+        assertEquals(20L, countDataSet.getRow().getValue(0));
+        assertFalse(countDataSet.next());
+    }
+
+    public void testCellValueWithWrongDatatypeIsSetToNull() {
+        // Unless Integers and Doubles are mixed all other incorrect values 
will be converted to null with a warning
+        final DataContext dataContext = new 
ExcelDataContext(copyOf("src/test/resources/different_datatypes.xls"),
+                new 
ExcelConfiguration(ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE, null, true, 
false, true, 19));
+        final Table table = dataContext.getDefaultSchema().getTables().get(0);
+        final DataSet testWrongDatatypeDataSet = dataContext
+                .query()
+                .from(table)
+                .select("MIXING_DOUBLE_AND_INT")
+                .execute();
+        IntStream.range(0, 20).forEach(i -> 
assertTrue(testWrongDatatypeDataSet.next()));
+        assertNull(testWrongDatatypeDataSet.getRow().getValue(0));
+        assertFalse(testWrongDatatypeDataSet.next());
+    }
+
+    public void testDetectingDataTypeNotSkippingLinesAndColumnsUsingNameLine() 
{
+        final ExcelDataContext dataContext = new 
ExcelDataContext(copyOf("src/test/resources/skipped_lines.xlsx"),
+                new ExcelConfiguration(6, null, false, false, true, 3));
+        final Table table = dataContext.getDefaultSchema().getTables().get(0);
+        assertEquals(ColumnType.STRING, table.getColumns().get(0).getType());
+        assertEquals(ColumnType.INTEGER, table.getColumns().get(6).getType());
+        assertEquals(ColumnType.INTEGER, table.getColumns().get(7).getType());
+    }
+
+    public void testDetectingDataTypeNotSkippingLinesAndColumnsNoNameLine() {
+        final ExcelDataContext dataContext = new 
ExcelDataContext(copyOf("src/test/resources/skipped_lines.xlsx"),
+                new ExcelConfiguration(ExcelConfiguration.NO_COLUMN_NAME_LINE, 
null, false, false, true, 3));
+        final Table table = dataContext.getDefaultSchema().getTables().get(0);
+        assertEquals(ColumnType.STRING, table.getColumns().get(0).getType());
+        assertEquals(ColumnType.INTEGER, table.getColumns().get(6).getType());
+        assertEquals(ColumnType.INTEGER, table.getColumns().get(7).getType());
+    }
+
+    public void testDetectingDataTypeSkippingLinesAndColumnsUsingNameLine() {
+        final ExcelDataContext dataContext = new 
ExcelDataContext(copyOf("src/test/resources/skipped_lines.xlsx"),
+                new 
ExcelConfiguration(ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE, null, true, 
true, true, 3));
+        final Table table = dataContext.getDefaultSchema().getTables().get(0);
+        assertEquals(ColumnType.INTEGER, table.getColumns().get(0).getType());
+        assertEquals(ColumnType.INTEGER, table.getColumns().get(1).getType());
+    }
+
+    public void testDetectingDataTypeSkippingLinesAndColumnsNoNameLine() {
+        final ExcelDataContext dataContext = new 
ExcelDataContext(copyOf("src/test/resources/skipped_lines.xlsx"),
+                new ExcelConfiguration(ExcelConfiguration.NO_COLUMN_NAME_LINE, 
null, true, true, true, 3));
+        final Table table = dataContext.getDefaultSchema().getTables().get(0);
+        assertEquals(ColumnType.INTEGER, table.getColumns().get(0).getType());
+        assertEquals(ColumnType.INTEGER, table.getColumns().get(1).getType());
+    }
+
+    public void testToMuchNumberOfLinesToScan() throws Exception {
+        final ExcelDataContext dataContext = new 
ExcelDataContext(copyOf("src/test/resources/skipped_lines.xlsx"),
+                new 
ExcelConfiguration(ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE, null, true, 
true, true, 4));
+
+        final Table table = dataContext.getDefaultSchema().getTables().get(0);
+        assertEquals("hello", table.getColumns().get(0).getName());
+        assertEquals("world", table.getColumns().get(1).getName());
+        assertEquals(ColumnType.INTEGER, table.getColumns().get(0).getType());
+        assertEquals(ColumnType.INTEGER, table.getColumns().get(1).getType());
+        final DataSet dataSet = 
dataContext.query().from(table).selectCount().execute();
+        assertTrue(dataSet.next());
+        assertEquals(3L, dataSet.getRow().getValue(0));
+    }
+
+    public void testToMissingValueDoesntEffectDetectedType() throws Exception {
+        final ExcelDataContext dataContext = new 
ExcelDataContext(copyOf("src/test/resources/xls_missing_values.xls"),
+                new 
ExcelConfiguration(ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE, null, true, 
false, true, 3));
+
+        final Table table = dataContext.getDefaultSchema().getTables().get(0);
+        final Column columnB = table.getColumns().get(1);
+        assertEquals("b", columnB.getName());
+        assertEquals(ColumnType.INTEGER, columnB.getType());
+        final DataSet dataSetColumnB = 
dataContext.query().from(table).select(columnB).execute();
+        assertTrue(dataSetColumnB.next());
+        assertTrue(dataSetColumnB.next());
+        assertNull(dataSetColumnB.getRow().getValue(0));
+
+        final Column columnD = table.getColumns().get(3);
+        assertEquals("d", columnD.getName());
+        assertEquals(ColumnType.INTEGER, columnD.getType());
+        final DataSet dataSetColumnD = dataContext
+                .query()
+                .from(table)
+                .select(columnD)
+                .where(columnD)
+                .eq(12)
+                .execute();
+        assertTrue(dataSetColumnD.next());
+        assertEquals(12, dataSetColumnD.getRow().getValue(0));
+    }
+
+
+    public void testInsertingValueOfValidColumnType() {
+        final ExcelDataContext dataContext = new 
ExcelDataContext(copyOf("src/test/resources/different_datatypes.xls"),
+                new 
ExcelConfiguration(ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE, null, true, 
false, true, 19));
+        final Table table = dataContext.getDefaultSchema().getTable(0);
+        dataContext.executeUpdate(new InsertInto(table).value("INTEGER", 123));
+        final DataSet dataSet = 
dataContext.query().from(table).selectAll().where("INTEGER").eq(123).execute();
+        assertTrue(dataSet.next());
+    }
+
+    @Test(expected = MetaModelException.class)
+    public void testInsertingValueOfInvalidColumnType() {
+        final ExcelDataContext dataContext = new 
ExcelDataContext(copyOf("src/test/resources/different_datatypes.xls"),
+                new 
ExcelConfiguration(ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE, null, true, 
false, true, 19));
+        final Table table = dataContext.getDefaultSchema().getTable(0);
+        dataContext.executeUpdate(new InsertInto(table).value("INTEGER", "this 
is not an integer"));
+    }
+
+    public void testUpdatingValueOfValidColumnType() {
+        final ExcelDataContext dataContext = new 
ExcelDataContext(copyOf("src/test/resources/different_datatypes.xls"),
+                new 
ExcelConfiguration(ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE, null, true, 
false, true, 19));
+        final Table table = dataContext.getDefaultSchema().getTable(0);
+        dataContext.executeUpdate(new Update(table).value("INTEGER", 
1).value("INTEGER", 123));
+        final DataSet dataSet = 
dataContext.query().from(table).selectAll().where("INTEGER").eq(123).execute();
+        assertTrue(dataSet.next());
+    }
+
+    @Test(expected = MetaModelException.class)
+    public void testUpdatingValueOfInvalidColumnType() {
+        final ExcelDataContext dataContext = new 
ExcelDataContext(copyOf("src/test/resources/different_datatypes.xls"),
+                new 
ExcelConfiguration(ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE, null, true, 
false, true, 19));
+        final Table table = dataContext.getDefaultSchema().getTable(0);
+        dataContext.executeUpdate(new Update(table).value("INTEGER", 
1).value("INTEGER", "this is not an integer"));
+    }
+
 }
\ No newline at end of file
diff --git a/excel/src/test/resources/different_datatypes.xls 
b/excel/src/test/resources/different_datatypes.xls
new file mode 100644
index 0000000..1cdb28e
Binary files /dev/null and b/excel/src/test/resources/different_datatypes.xls 
differ
diff --git a/excel/src/test/resources/different_datatypes.xlsx 
b/excel/src/test/resources/different_datatypes.xlsx
new file mode 100644
index 0000000..a7be4d0
Binary files /dev/null and b/excel/src/test/resources/different_datatypes.xlsx 
differ

Reply via email to