This is an automated email from the ASF dual-hosted git repository. dzamo pushed a commit to branch 1.20 in repository https://gitbox.apache.org/repos/asf/drill.git
commit e6f67d254191d51b5a1c5615dbeef92798ef04d2 Author: James Turton <[email protected]> AuthorDate: Thu Aug 18 02:12:44 2022 +0800 DRILL-8280: Cannot ANALYZE files containing non-ASCII column names (#2625) --- .../drill/exec/expr/fn/impl/SchemaFunctions.java | 5 +- .../drill/exec/sql/TestMetastoreCommands.java | 193 ++++++++++++--------- 2 files changed, 116 insertions(+), 82 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SchemaFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SchemaFunctions.java index 7bce81be08..a71447eb4a 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SchemaFunctions.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SchemaFunctions.java @@ -151,8 +151,9 @@ public class SchemaFunctions { } org.apache.drill.exec.record.metadata.TupleMetadata currentSchema = - org.apache.drill.exec.expr.fn.impl.SchemaFunctions.getTupleMetadata( - org.apache.drill.common.util.DrillStringUtils.toBinaryString(input.buffer, input.start, input.end)); + org.apache.drill.exec.expr.fn.impl.SchemaFunctions.getTupleMetadata( + org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer)); + if (schemaHolder.obj == null) { schemaHolder.obj = currentSchema; return; diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java index 75f90868e4..7f42403c9f 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java @@ -64,10 +64,12 @@ import java.nio.file.Paths; import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneId; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -100,85 +102,87 @@ public class TestMetastoreCommands extends ClusterTest { .build(); public static final Map<SchemaPath, ColumnStatistics<?>> TABLE_COLUMN_STATISTICS = - ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder() - .put(SchemaPath.getSimplePath("o_shippriority"), - getColumnStatistics(0, 0, 120L, TypeProtos.MinorType.INT)) - .put(SchemaPath.getSimplePath("o_orderstatus"), - getColumnStatistics("F", "P", 120L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_orderpriority"), - getColumnStatistics("1-URGENT", "5-LOW", 120L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_orderkey"), - getColumnStatistics(1, 1319, 120L, TypeProtos.MinorType.INT)) - .put(SchemaPath.getSimplePath("o_clerk"), - getColumnStatistics("Clerk#000000004", "Clerk#000000995", 120L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_totalprice"), - getColumnStatistics(3266.69, 350110.21, 120L, TypeProtos.MinorType.FLOAT8)) - .put(SchemaPath.getSimplePath("o_comment"), + new LinkedHashMap<SchemaPath, ColumnStatistics<?>>() + {{ + put(SchemaPath.getSimplePath("o_shippriority"), + getColumnStatistics(0, 0, 120L, TypeProtos.MinorType.INT)); + put(SchemaPath.getSimplePath("o_orderstatus"), + getColumnStatistics("F", "P", 120L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_orderpriority"), + getColumnStatistics("1-URGENT", "5-LOW", 120L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_orderkey"), + getColumnStatistics(1, 1319, 120L, TypeProtos.MinorType.INT)); + put(SchemaPath.getSimplePath("o_clerk"), + getColumnStatistics("Clerk#000000004", "Clerk#000000995", 120L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_totalprice"), + getColumnStatistics(3266.69, 350110.21, 120L, TypeProtos.MinorType.FLOAT8)); + put(SchemaPath.getSimplePath("o_comment"), getColumnStatistics(" about the final platelets. dependen", - "zzle. carefully enticing deposits nag furio", 120L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_custkey"), - getColumnStatistics(25, 1498, 120L, TypeProtos.MinorType.INT)) - .put(SchemaPath.getSimplePath("dir0"), - getColumnStatistics("1994", "1996", 120L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("dir1"), - getColumnStatistics("Q1", "Q4", 120L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_orderdate"), - getColumnStatistics(757382400000L, 850953600000L, 120L, TypeProtos.MinorType.DATE)) - .build(); + "zzle. carefully enticing deposits nag furio", 120L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_custkey"), + getColumnStatistics(25, 1498, 120L, TypeProtos.MinorType.INT)); + put(SchemaPath.getSimplePath("dir0"), + getColumnStatistics("1994", "1996", 120L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("dir1"), + getColumnStatistics("Q1", "Q4", 120L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_orderdate"), + getColumnStatistics(757382400000L, 850953600000L, 120L, TypeProtos.MinorType.DATE)); + }}; public static final Map<SchemaPath, ColumnStatistics<?>> DIR0_1994_SEGMENT_COLUMN_STATISTICS = - ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder() - .put(SchemaPath.getSimplePath("o_shippriority"), - getColumnStatistics(0, 0, 40L, TypeProtos.MinorType.INT)) - .put(SchemaPath.getSimplePath("o_orderstatus"), - getColumnStatistics("F", "F", 40L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_orderpriority"), - getColumnStatistics("1-URGENT", "5-LOW", 40L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_orderkey"), - getColumnStatistics(5, 1031, 40L, TypeProtos.MinorType.INT)) - .put(SchemaPath.getSimplePath("o_clerk"), - getColumnStatistics("Clerk#000000004", "Clerk#000000973", 40L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_totalprice"), - getColumnStatistics(3266.69, 350110.21, 40L, TypeProtos.MinorType.FLOAT8)) - .put(SchemaPath.getSimplePath("o_comment"), + new LinkedHashMap<SchemaPath, ColumnStatistics<?>>() + {{ + put(SchemaPath.getSimplePath("o_shippriority"), + getColumnStatistics(0, 0, 40L, TypeProtos.MinorType.INT)); + put(SchemaPath.getSimplePath("o_orderstatus"), + getColumnStatistics("F", "F", 40L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_orderpriority"), + getColumnStatistics("1-URGENT", "5-LOW", 40L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_orderkey"), + getColumnStatistics(5, 1031, 40L, TypeProtos.MinorType.INT)); + put(SchemaPath.getSimplePath("o_clerk"), + getColumnStatistics("Clerk#000000004", "Clerk#000000973", 40L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_totalprice"), + getColumnStatistics(3266.69, 350110.21, 40L, TypeProtos.MinorType.FLOAT8)); + put(SchemaPath.getSimplePath("o_comment"), getColumnStatistics(" accounts nag slyly. ironic, ironic accounts wake blithel", - "yly final requests over the furiously regula", 40L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_custkey"), - getColumnStatistics(25, 1469, 40L, TypeProtos.MinorType.INT)) - .put(SchemaPath.getSimplePath("dir0"), - getColumnStatistics("1994", "1994", 40L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("dir1"), - getColumnStatistics("Q1", "Q4", 40L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_orderdate"), - getColumnStatistics(757382400000L, 788140800000L, 40L, TypeProtos.MinorType.DATE)) - .build(); + "yly final requests over the furiously regula", 40L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_custkey"), + getColumnStatistics(25, 1469, 40L, TypeProtos.MinorType.INT)); + put(SchemaPath.getSimplePath("dir0"), + getColumnStatistics("1994", "1994", 40L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("dir1"), + getColumnStatistics("Q1", "Q4", 40L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_orderdate"), + getColumnStatistics(757382400000L, 788140800000L, 40L, TypeProtos.MinorType.DATE)); + }}; public static final Map<SchemaPath, ColumnStatistics<?>> DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS = - ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder() - .put(SchemaPath.getSimplePath("o_shippriority"), - getColumnStatistics(0, 0, 10L, TypeProtos.MinorType.INT)) - .put(SchemaPath.getSimplePath("o_orderstatus"), - getColumnStatistics("F", "F", 10L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_orderpriority"), - getColumnStatistics("1-URGENT", "5-LOW", 10L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_orderkey"), - getColumnStatistics(66, 833, 10L, TypeProtos.MinorType.INT)) - .put(SchemaPath.getSimplePath("o_clerk"), - getColumnStatistics("Clerk#000000062", "Clerk#000000973", 10L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_totalprice"), - getColumnStatistics(3266.69, 132531.73, 10L, TypeProtos.MinorType.FLOAT8)) - .put(SchemaPath.getSimplePath("o_comment"), + new LinkedHashMap<SchemaPath, ColumnStatistics<?>>() {{ + put(SchemaPath.getSimplePath("o_shippriority"), + getColumnStatistics(0, 0, 10L, TypeProtos.MinorType.INT)); + put(SchemaPath.getSimplePath("o_orderstatus"), + getColumnStatistics("F", "F", 10L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_orderpriority"), + getColumnStatistics("1-URGENT", "5-LOW", 10L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_orderkey"), + getColumnStatistics(66, 833, 10L, TypeProtos.MinorType.INT)); + put(SchemaPath.getSimplePath("o_clerk"), + getColumnStatistics("Clerk#000000062", "Clerk#000000973", 10L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_totalprice"), + getColumnStatistics(3266.69, 132531.73, 10L, TypeProtos.MinorType.FLOAT8)); + put(SchemaPath.getSimplePath("o_comment"), getColumnStatistics(" special pinto beans use quickly furiously even depende", - "y pending requests integrate", 10L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_custkey"), - getColumnStatistics(392, 1411, 10L, TypeProtos.MinorType.INT)) - .put(SchemaPath.getSimplePath("dir0"), - getColumnStatistics("1994", "1994", 10L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("dir1"), - getColumnStatistics("Q1", "Q1", 10L, TypeProtos.MinorType.VARCHAR)) - .put(SchemaPath.getSimplePath("o_orderdate"), - getColumnStatistics(757382400000L, 764640000000L, 10L, TypeProtos.MinorType.DATE)) - .build(); + "y pending requests integrate", 10L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_custkey"), + getColumnStatistics(392, 1411, 10L, TypeProtos.MinorType.INT)); + put(SchemaPath.getSimplePath("dir0"), + getColumnStatistics("1994", "1994", 10L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("dir1"), + getColumnStatistics("Q1", "Q1", 10L, TypeProtos.MinorType.VARCHAR)); + put(SchemaPath.getSimplePath("o_orderdate"), + getColumnStatistics(757382400000L, 764640000000L, 10L, TypeProtos.MinorType.DATE)); + }}; public static final MetadataInfo TABLE_META_INFO = MetadataInfo.builder() .type(MetadataType.TABLE) @@ -3564,15 +3568,44 @@ public class TestMetastoreCommands extends ClusterTest { } } - public static <T> ColumnStatistics<T> getColumnStatistics(T minValue, T maxValue, - long rowCount, TypeProtos.MinorType minorType) { + @Test // DRILL-8280 + public void testNonAsciiColumnName() throws Exception { + String tableName = "utf8_col_name"; + String colName = "Käse"; + + run("create table dfs.tmp.%s as select 'Cheddar' as `%s`", tableName, colName); + try { + testBuilder() + .sqlQuery("analyze table dfs.tmp.`%s` refresh metadata", tableName) + .unOrdered() + .baselineColumns("ok", "summary") + .baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)) + .go(); + String query = "select column_name from information_schema.`columns` where table_name='%s' and column_name='%s'"; + + testBuilder() + .sqlQuery(query, tableName, colName) + .unOrdered() + .baselineColumns("column_name") + .baselineValues(colName) + .go(); + } finally { + run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName); + run("drop table if exists dfs.tmp.`%s`", tableName); + } + } + + + public static <T> ColumnStatistics<T> getColumnStatistics(T minValue, T maxValue, long rowCount, + TypeProtos.MinorType minorType) { return new ColumnStatistics<>( - Arrays.asList( - new StatisticsHolder<>(minValue, ColumnStatisticsKind.MIN_VALUE), - new StatisticsHolder<>(maxValue, ColumnStatisticsKind.MAX_VALUE), - new StatisticsHolder<>(rowCount, TableStatisticsKind.ROW_COUNT), - new StatisticsHolder<>(rowCount, ColumnStatisticsKind.NON_NULL_VALUES_COUNT), - new StatisticsHolder<>(0L, ColumnStatisticsKind.NULLS_COUNT)), + new ArrayList() {{ + add(new StatisticsHolder<>(minValue, ColumnStatisticsKind.MIN_VALUE)); + add(new StatisticsHolder<>(maxValue, ColumnStatisticsKind.MAX_VALUE)); + add(new StatisticsHolder<>(rowCount, TableStatisticsKind.ROW_COUNT)); + add(new StatisticsHolder<>(rowCount, ColumnStatisticsKind.NON_NULL_VALUES_COUNT)); + add(new StatisticsHolder<>(0L, ColumnStatisticsKind.NULLS_COUNT)); + }}, minorType); }
