[drill] 02/02: DRILL-7643: Fix issues with using columns with the same name as a reserved keyword

volodymyr Mon, 23 Mar 2020 04:49:09 -0700

This is an automated email from the ASF dual-hosted git repository.

volodymyr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


commit cee1b994a8b96c183e14360f4103583eb8ade475
Author: Volodymyr Vysotskyi <[email protected]>
AuthorDate: Mon Mar 16 20:41:35 2020 +0200

    DRILL-7643: Fix issues with using columns with the same name as a reserved 
keyword
    
    closes #2028
---
 .../exec/metastore/analyze/AnalyzeColumnUtils.java   |  2 +-
 .../metastore/analyze/FileMetadataInfoCollector.java |  1 +
 .../planner/sql/handlers/RefreshMetadataHandler.java |  6 +++++-
 .../planner/sql/parser/SqlMetastoreAnalyzeTable.java |  3 +--
 .../store/easy/json/StatisticsCollectorImpl.java     |  1 +
 .../drill/exec/store/ischema/RecordCollector.java    | 20 ++++++++++++--------
 .../apache/drill/exec/sql/TestMetastoreCommands.java | 14 +++++++++++---
 .../exec/store/parquet/TestParquetMetadataCache.java |  4 ++--
 8 files changed, 34 insertions(+), 17 deletions(-)

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeColumnUtils.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeColumnUtils.java
index feeeab7..1f97399 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeColumnUtils.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeColumnUtils.java
@@ -51,7 +51,7 @@ public class AnalyzeColumnUtils {
    * Returns actual column name obtained form intermediate name which includes 
statistics kind and other analyze-specific info.
    * <p>
    * Example: column which corresponds to max statistics value for {@code 
`o_shippriority`} column is {@code column$maxValue$`o_shippriority`}.
-   * This method will return actual column name: {@code `o_shippriority`}.
+   * This method will return escaped actual column name: {@code 
`o_shippriority`}.
    *
    * @param fullName the source of actual column name
    * @return actual column name
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/FileMetadataInfoCollector.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/FileMetadataInfoCollector.java
index d9765d6..cc156f0 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/FileMetadataInfoCollector.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/FileMetadataInfoCollector.java
@@ -128,6 +128,7 @@ public class FileMetadataInfoCollector implements 
MetadataInfoCollector {
     List<SchemaPath> metastoreInterestingColumns =
         
Optional.ofNullable(basicRequests.interestingColumnsAndPartitionKeys(tableInfo).interestingColumns())
             .map(metastoreInterestingColumnNames -> 
metastoreInterestingColumnNames.stream()
+                // interesting column names are escaped, so 
SchemaPath.parseFromString() should be used here
                 .map(SchemaPath::parseFromString)
                 .collect(Collectors.toList()))
         .orElse(null);
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/RefreshMetadataHandler.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/RefreshMetadataHandler.java
index 7b6a19a..a2607a3 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/RefreshMetadataHandler.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/RefreshMetadataHandler.java
@@ -148,7 +148,11 @@ public class RefreshMetadataHandler extends 
DefaultSqlHandler {
     if (columnList != null) {
       for (SqlNode column : columnList.getList()) {
         // Add only the root segment. Collect metadata for all the columns 
under that root segment
-        
columnSet.add(SchemaPath.getSimplePath(SchemaPath.parseFromString(column.toString()).getRootSegmentPath()));
+        columnSet.add(
+            SchemaPath.getSimplePath(
+                SchemaPath.parseFromString(
+                        column.toSqlString(null, true).getSql())
+                    .getRootSegmentPath()));
       }
     }
     return columnSet;
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlMetastoreAnalyzeTable.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlMetastoreAnalyzeTable.java
index a2bf8a9..a9ab526 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlMetastoreAnalyzeTable.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlMetastoreAnalyzeTable.java
@@ -127,8 +127,7 @@ public class SqlMetastoreAnalyzeTable extends DrillSqlCall {
     }
 
     return fieldList.getList().stream()
-        .map(SqlNode::toString)
-        .map(SchemaPath::parseFromString)
+        .map(sqlNode -> SchemaPath.parseFromString(sqlNode.toSqlString(null, 
true).getSql()))
         .collect(Collectors.toList());
   }
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/StatisticsCollectorImpl.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/StatisticsCollectorImpl.java
index f2dcac9..96ef51b 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/StatisticsCollectorImpl.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/StatisticsCollectorImpl.java
@@ -240,6 +240,7 @@ public class StatisticsCollectorImpl extends 
JSONBaseStatisticsRecordWriter {
       }
       switch (nextField) {
         case Statistic.COLNAME:
+          // column name is escaped, so SchemaPath.parseFromString() should be 
used here
           ((DrillStatsTable.ColumnStatistics_v1) 
columnStatistics).setName(SchemaPath.parseFromString(reader.readText().toString()));
           break;
         case Statistic.COLTYPE:
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/RecordCollector.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/RecordCollector.java
index a672d19..da4f474 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/RecordCollector.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/RecordCollector.java
@@ -23,6 +23,7 @@ import org.apache.calcite.rel.type.RelDataTypeField;
 import org.apache.calcite.schema.Schema;
 import org.apache.calcite.schema.SchemaPlus;
 import org.apache.calcite.schema.Table;
+import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.exec.ExecConstants;
@@ -318,7 +319,7 @@ public interface RecordCollector {
      * @param schemaPath schema name
      * @param table table instance
      * @param schema table or column schema
-     * @param parentColumnName parent column name if any
+     * @param parentColumnNames list of parent column names if any
      * @param columnIndex column index if any
      * @param isNested indicates if column is nested
      * @return list of column records
@@ -326,28 +327,31 @@ public interface RecordCollector {
     private List<Records.Column> columns(String schemaPath,
                                          BaseTableMetadata table,
                                          TupleMetadata schema,
-                                         String parentColumnName,
+                                         List<String> parentColumnNames,
                                          int columnIndex,
                                          boolean isNested) {
       List<Records.Column> records = new ArrayList<>();
       schema.toMetadataList().forEach(
         column -> {
-          // concat parent column name to use full column name, i.e. 
struct_col.nested_col
-          String columnName = parentColumnName == null ? column.name() : 
parentColumnName + "." + column.name();
+          List<String> columnNames = 
CollectionUtils.isEmpty(parentColumnNames) ? new ArrayList<>() : new 
ArrayList<>(parentColumnNames);
+          columnNames.add(column.name());
           // nested columns have the same index as their parent
           int currentIndex = columnIndex == UNDEFINED_INDEX ? 
schema.index(column.name()) : columnIndex;
           // if column is a map / struct, recursively scan nested columns
           if (column.isMap()) {
             List<Records.Column> mapRecords =
-              columns(schemaPath, table, column.tupleSchema(), columnName, 
currentIndex, true);
+              columns(schemaPath, table, column.tupleSchema(), columnNames, 
currentIndex, true);
             records.addAll(mapRecords);
           }
 
           String tableName = table.getTableInfo().name();
-          if (filterEvaluator.shouldVisitColumn(schemaPath, tableName, 
columnName)) {
+
+          // concat parent column names to use full column name, i.e. 
struct_col.nested_col
+          String columnPath = String.join(".", columnNames);
+          if (filterEvaluator.shouldVisitColumn(schemaPath, tableName, 
columnPath)) {
             ColumnStatistics<?> columnStatistics =
-              
table.getColumnStatistics(SchemaPath.parseFromString(columnName));
-            records.add(new Records.Column(IS_CATALOG_NAME, schemaPath, 
tableName, columnName,
+              
table.getColumnStatistics(SchemaPath.getCompoundPath(columnNames.toArray(new 
String[0])));
+            records.add(new Records.Column(IS_CATALOG_NAME, schemaPath, 
tableName, columnPath,
               column, columnStatistics, currentIndex, isNested));
           }
         });
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
index 9c63da5..92472cf 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
@@ -3227,21 +3227,21 @@ public class TestMetastoreCommands extends ClusterTest {
   public void testAnalyzeEmptyRequiredParquetTable() throws Exception {
     String tableName = "analyze_empty_simple_required";
 
-    run("create table dfs.tmp.%s as select 1 as id, 'a' as name from 
(values(1)) where 1 = 2", tableName);
+    run("create table dfs.tmp.%s as select 1 as `date`, 'a' as name from 
(values(1)) where 1 = 2", tableName);
 
     File table = new File(dirTestWatcher.getDfsTestTmpDir(), tableName);
 
     TableInfo tableInfo = getTableInfo(tableName, "tmp");
 
     TupleMetadata schema = new SchemaBuilder()
-        .add("id", TypeProtos.MinorType.INT)
+        .add("date", TypeProtos.MinorType.INT)
         .add("name", TypeProtos.MinorType.VARCHAR)
         .build();
 
     Map<SchemaPath, ColumnStatistics<?>> columnStatistics = 
ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder()
         .put(SchemaPath.getSimplePath("name"),
             getColumnStatistics(null, null, 0L, TypeProtos.MinorType.VARCHAR))
-        .put(SchemaPath.getSimplePath("id"),
+        .put(SchemaPath.getSimplePath("date"),
             getColumnStatistics(null, null, 0L, TypeProtos.MinorType.INT))
         .build();
 
@@ -3300,6 +3300,14 @@ public class TestMetastoreCommands extends ClusterTest {
           .rowGroupsMetadata(tableInfo, (String) null, null);
 
       assertEquals(1, rowGroupsMetadata.size());
+
+      testBuilder()
+          .sqlQuery("select COLUMN_NAME from INFORMATION_SCHEMA.`COLUMNS` 
where table_name='%s'", tableName)
+          .unOrdered()
+          .baselineColumns("COLUMN_NAME")
+          .baselineValues("date")
+          .baselineValues("name")
+          .go();
     } finally {
       run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
       run("drop table if exists dfs.tmp.`%s`", tableName);
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
index e7e9c0c..2ecc3b5 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
@@ -944,9 +944,9 @@ public class TestParquetMetadataCache extends PlanTestBase {
 
   @Test
   public void testRefreshWithColumns() throws Exception {
-    test("refresh table metadata columns (o_custkey, o_orderdate) dfs.`%s`", 
TABLE_NAME_1);
+    test("refresh table metadata columns (`date`, o_orderdate) dfs.`%s`", 
TABLE_NAME_1);
     checkForMetadataFile(TABLE_NAME_1);
-    String query = String.format("select dir0, dir1, o_custkey, o_orderdate 
from dfs.`%s` " +
+    String query = String.format("select dir0, dir1, o_custkey as `date`, 
o_orderdate from dfs.`%s` " +
             " where dir0=1994 and dir1 in ('Q1', 'Q2')", TABLE_NAME_1);
     int expectedRowCount = 20;
     int actualRowCount = testSql(query);

[drill] 02/02: DRILL-7643: Fix issues with using columns with the same name as a reserved keyword

Reply via email to