This is an automated email from the ASF dual-hosted git repository. cgivre pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit 5632dea97f2fb016954567877124f89a2fdfd440 Author: Charles Givre <[email protected]> AuthorDate: Wed Oct 21 11:42:56 2020 -0400 Initial commit --- contrib/format-excel/pom.xml | 2 +- .../drill/exec/store/excel/ExcelBatchReader.java | 4 ++- .../drill/exec/store/excel/TestExcelFormat.java | 41 ++++++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/contrib/format-excel/pom.xml b/contrib/format-excel/pom.xml index 7399266..39572a9 100644 --- a/contrib/format-excel/pom.xml +++ b/contrib/format-excel/pom.xml @@ -67,7 +67,7 @@ <dependency> <groupId>com.github.pjfanning</groupId> <artifactId>excel-streaming-reader</artifactId> - <version>2.3.3</version> + <version>2.3.5</version> </dependency> </dependencies> <build> diff --git a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java index c60b163..1df4071 100644 --- a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java +++ b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java @@ -288,6 +288,8 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> { break; case FORMULA: case NUMERIC: + case _NONE: + case BLANK: tempColumnName = cell.getStringCellValue(); makeColumn(builder, tempColumnName, TypeProtos.MinorType.FLOAT8); excelFieldNames.add(colPosition, tempColumnName); @@ -457,7 +459,7 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> { } else if (cellType == CellType.NUMERIC && DateUtil.isCellDateFormatted(cell)) { // Case if the column is a date or time addColumnToArray(rowWriter, excelFieldNames.get(colPosition), MinorType.TIMESTAMP, false); - } else if (cellType == CellType.NUMERIC || cellType == CellType.FORMULA) { + } else if (cellType == CellType.NUMERIC || cellType == CellType.FORMULA || cellType == CellType.BLANK || cellType == CellType._NONE) { // Case if the column is numeric addColumnToArray(rowWriter, excelFieldNames.get(colPosition), MinorType.FLOAT8, false); } else { diff --git a/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java b/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java index 387d708..934f78c 100644 --- a/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java +++ b/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java @@ -21,6 +21,7 @@ package org.apache.drill.exec.store.excel; import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.exceptions.DrillRuntimeException; import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.rowSet.RowSet; import org.apache.drill.exec.physical.rowSet.RowSetBuilder; import org.apache.drill.exec.record.metadata.SchemaBuilder; @@ -431,4 +432,44 @@ public class TestExcelFormat extends ClusterTest { .include("Limit", "maxRecords=5") .match(); } + + @Test + public void testBlankColumnFix() throws Exception { + String sql = "SELECT * FROM dfs.`excel/zips-small.xlsx`"; + + RowSet results = client.queryBuilder().sql(sql).rowSet(); + TupleMetadata expectedSchema = new SchemaBuilder() + .addNullable("zip", MinorType.FLOAT8) + .addNullable("lat", MinorType.FLOAT8) + .addNullable("lng", MinorType.FLOAT8) + .addNullable("city", MinorType.VARCHAR) + .addNullable("state_id", MinorType.VARCHAR) + .addNullable("state_name", MinorType.VARCHAR) + .addNullable("zcta", MinorType.VARCHAR) + .addNullable("parent_zcta", MinorType.FLOAT8) + .addNullable("population", MinorType.FLOAT8) + .addNullable("density", MinorType.FLOAT8) + .addNullable("county_fips", MinorType.FLOAT8) + .addNullable("county_name", MinorType.VARCHAR) + .addNullable("county_weights", MinorType.VARCHAR) + .addNullable("county_names_all", MinorType.VARCHAR) + .addNullable("county_fips_all", MinorType.VARCHAR) + .addNullable("imprecise", MinorType.VARCHAR) + .addNullable("military", MinorType.VARCHAR) + .addNullable("timezone", MinorType.VARCHAR) + .buildSchema(); + + RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema) + .addRow(601.0, 18.18004, -66.75218, "Adjuntas", "PR", "Puerto Rico", "TRUE", 0.0, 17242.0, 111.4, 72001.0, "Adjuntas", "{'72001':99.43,'72141':0.57}", "Adjuntas|Utuado", + "72001|72141", "FALSE", "FALSE", "America/Puerto_Rico") + .addRow(602.0, 18.36073, -67.17517, "Aguada", "PR", "Puerto Rico", "TRUE", 0.0, 38442.0, 523.5, 72003.0, "Aguada", "{'72003':100}", "Aguada", "72003", "FALSE", "FALSE", "America" + + "/Puerto_Rico") + .addRow(603.0, 18.45439, -67.12202, "Aguadilla", "PR", "Puerto Rico", "TRUE", 0.0, 48814.0, 667.9, 72005.0, "Aguadilla", "{'72005':100}", "Aguadilla", "72005", "FALSE", "FALSE", + "America/Puerto_Rico") + .addRow(606.0, 18.16724, -66.93828, "Maricao", "PR", "Puerto Rico", "TRUE", 0.0, 6437.0, 60.4, 72093.0, "Maricao", "{'72093':94.88,'72121':1.35,'72153':3.78}", "Maricao|Yauco" + + "|Sabana Grande", "72093|72153|72121", "FALSE", "FALSE", "America/Puerto_Rico") + .build(); + + new RowSetComparison(expected).verifyAndClearAll(results); + } }
