HIVE-20483 Really move metastore common classes into metastore-common (Alexander Kolbasov via Alan Gates).
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/35f86c74 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/35f86c74 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/35f86c74 Branch: refs/heads/master Commit: 35f86c749cefc2a9972a991deed78a1c3719093d Parents: 2111165 Author: Alan Gates <[email protected]> Authored: Thu Sep 13 12:38:43 2018 -0700 Committer: Alan Gates <[email protected]> Committed: Thu Sep 13 12:38:43 2018 -0700 ---------------------------------------------------------------------- beeline/pom.xml | 5 + hcatalog/hcatalog-pig-adapter/pom.xml | 7 + hcatalog/pom.xml | 7 + metastore/pom.xml | 5 - ql/pom.xml | 5 + .../hadoop/hive/common/StatsSetupConst.java | 336 ++ .../hadoop/hive/metastore/ColumnType.java | 301 ++ .../hive/metastore/FileMetadataHandler.java | 109 + .../hive/metastore/HiveMetaStoreClient.java | 3637 +++++++++++++++++ .../hadoop/hive/metastore/IMetaStoreClient.java | 3761 ++++++++++++++++++ .../hadoop/hive/metastore/MetaStoreFS.java | 43 + .../hadoop/hive/metastore/MetadataStore.java | 52 + .../hive/metastore/MetastoreTaskThread.java | 38 + .../metastore/PartitionExpressionProxy.java | 73 + .../hive/metastore/ReplChangeManager.java | 502 +++ .../hive/metastore/RetryingMetaStoreClient.java | 341 ++ .../hadoop/hive/metastore/TableIterable.java | 115 + .../apache/hadoop/hive/metastore/Warehouse.java | 759 ++++ .../hive/metastore/conf/MetastoreConf.java | 1713 ++++++++ .../hive/metastore/conf/TimeValidator.java | 67 + .../spec/CompositePartitionSpecProxy.java | 258 ++ .../spec/PartitionListComposingSpecProxy.java | 209 + .../partition/spec/PartitionSpecProxy.java | 220 + .../spec/PartitionSpecWithSharedSDProxy.java | 192 + .../security/DelegationTokenIdentifier.java | 52 + .../security/DelegationTokenSecretManager.java | 134 + .../security/DelegationTokenSelector.java | 33 + .../security/HadoopThriftAuthBridge.java | 700 ++++ .../security/HadoopThriftAuthBridge23.java | 114 + .../hadoop/hive/metastore/utils/FileUtils.java | 597 +++ .../hadoop/hive/metastore/utils/HdfsUtils.java | 395 ++ .../hive/metastore/utils/MetaStoreUtils.java | 908 +++++ .../hive/metastore/utils/SecurityUtils.java | 270 ++ .../hadoop/hive/common/StatsSetupConst.java | 336 -- .../hadoop/hive/metastore/ColumnType.java | 301 -- .../hive/metastore/FileMetadataHandler.java | 109 - .../hive/metastore/HiveMetaStoreClient.java | 3637 ----------------- .../hadoop/hive/metastore/IMetaStoreClient.java | 3761 ------------------ .../hadoop/hive/metastore/MetaStoreFS.java | 43 - .../hadoop/hive/metastore/MetadataStore.java | 52 - .../hive/metastore/MetastoreTaskThread.java | 38 - .../metastore/PartitionExpressionProxy.java | 73 - .../hive/metastore/ReplChangeManager.java | 502 --- .../hive/metastore/RetryingMetaStoreClient.java | 341 -- .../hadoop/hive/metastore/TableIterable.java | 115 - .../apache/hadoop/hive/metastore/Warehouse.java | 759 ---- .../hive/metastore/conf/MetastoreConf.java | 1713 -------- .../hive/metastore/conf/TimeValidator.java | 67 - .../spec/CompositePartitionSpecProxy.java | 258 -- .../spec/PartitionListComposingSpecProxy.java | 209 - .../partition/spec/PartitionSpecProxy.java | 220 - .../spec/PartitionSpecWithSharedSDProxy.java | 192 - .../security/DelegationTokenIdentifier.java | 52 - .../security/DelegationTokenSecretManager.java | 134 - .../security/DelegationTokenSelector.java | 33 - .../security/HadoopThriftAuthBridge.java | 700 ---- .../security/HadoopThriftAuthBridge23.java | 114 - .../hadoop/hive/metastore/utils/FileUtils.java | 597 --- .../hadoop/hive/metastore/utils/HdfsUtils.java | 395 -- .../hive/metastore/utils/MetaStoreUtils.java | 908 ----- .../hive/metastore/utils/SecurityUtils.java | 270 -- streaming/pom.xml | 6 + 62 files changed, 15959 insertions(+), 15934 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/35f86c74/beeline/pom.xml ---------------------------------------------------------------------- diff --git a/beeline/pom.xml b/beeline/pom.xml index 4567d5e..19ec53e 100644 --- a/beeline/pom.xml +++ b/beeline/pom.xml @@ -55,6 +55,11 @@ <artifactId>hive-jdbc</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-standalone-metastore-server</artifactId> + <version>${project.version}</version> + </dependency> <!-- inter-project --> <dependency> <groupId>commons-cli</groupId> http://git-wip-us.apache.org/repos/asf/hive/blob/35f86c74/hcatalog/hcatalog-pig-adapter/pom.xml ---------------------------------------------------------------------- diff --git a/hcatalog/hcatalog-pig-adapter/pom.xml b/hcatalog/hcatalog-pig-adapter/pom.xml index a1c8ddf..c026835 100644 --- a/hcatalog/hcatalog-pig-adapter/pom.xml +++ b/hcatalog/hcatalog-pig-adapter/pom.xml @@ -137,6 +137,13 @@ <scope>test</scope> </dependency> <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-standalone-metastore-server</artifactId> + <version>4.0.0-SNAPSHOT</version> + <classifier>tests</classifier> + <scope>test</scope> + </dependency> + <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-common</artifactId> <version>${hadoop.version}</version> http://git-wip-us.apache.org/repos/asf/hive/blob/35f86c74/hcatalog/pom.xml ---------------------------------------------------------------------- diff --git a/hcatalog/pom.xml b/hcatalog/pom.xml index 4894e9a..91d7daf 100644 --- a/hcatalog/pom.xml +++ b/hcatalog/pom.xml @@ -66,6 +66,13 @@ <scope>test</scope> </dependency> <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-standalone-metastore-server</artifactId> + <version>4.0.0-SNAPSHOT</version> + <classifier>tests</classifier> + <scope>test</scope> + </dependency> + <dependency> <groupId>org.apache.pig</groupId> <artifactId>pig</artifactId> <version>${pig.version}</version> http://git-wip-us.apache.org/repos/asf/hive/blob/35f86c74/metastore/pom.xml ---------------------------------------------------------------------- diff --git a/metastore/pom.xml b/metastore/pom.xml index 7f751a4..a75ab97 100644 --- a/metastore/pom.xml +++ b/metastore/pom.xml @@ -49,11 +49,6 @@ <artifactId>hive-standalone-metastore-common</artifactId> <version>${project.version}</version> </dependency> - <dependency> - <groupId>org.apache.hive</groupId> - <artifactId>hive-standalone-metastore-server</artifactId> - <version>${project.version}</version> - </dependency> <dependency> <groupId>javolution</groupId> <artifactId>javolution</artifactId> http://git-wip-us.apache.org/repos/asf/hive/blob/35f86c74/ql/pom.xml ---------------------------------------------------------------------- diff --git a/ql/pom.xml b/ql/pom.xml index a55cbe3..d73deba 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -448,6 +448,11 @@ <artifactId>opencsv</artifactId> <version>${opencsv.version}</version> </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-standalone-metastore-server</artifactId> + <version>${project.version}</version> + </dependency> <!-- test intra-project --> <dependency> <groupId>org.apache.hive</groupId> http://git-wip-us.apache.org/repos/asf/hive/blob/35f86c74/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java new file mode 100644 index 0000000..35be3c4 --- /dev/null +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -0,0 +1,336 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import com.google.common.collect.ImmutableList; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; +import com.fasterxml.jackson.databind.ObjectWriter; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; + + +/** + * A class that defines the constant strings used by the statistics implementation. + */ + +public class StatsSetupConst { + + protected static final Logger LOG = LoggerFactory.getLogger(StatsSetupConst.class.getName()); + + public enum StatDB { + fs { + @Override + public String getPublisher(Configuration conf) { + return "org.apache.hadoop.hive.ql.stats.fs.FSStatsPublisher"; + } + + @Override + public String getAggregator(Configuration conf) { + return "org.apache.hadoop.hive.ql.stats.fs.FSStatsAggregator"; + } + }, + custom { + @Override + public String getPublisher(Configuration conf) { + return MetastoreConf.getVar(conf, ConfVars.STATS_DEFAULT_PUBLISHER); } + @Override + public String getAggregator(Configuration conf) { + return MetastoreConf.getVar(conf, ConfVars.STATS_DEFAULT_AGGREGATOR); } + }; + public abstract String getPublisher(Configuration conf); + public abstract String getAggregator(Configuration conf); + } + + // statistics stored in metastore + /** + * The name of the statistic Num Files to be published or gathered. + */ + public static final String NUM_FILES = "numFiles"; + + /** + * The name of the statistic Num Partitions to be published or gathered. + */ + public static final String NUM_PARTITIONS = "numPartitions"; + + /** + * The name of the statistic Total Size to be published or gathered. + */ + public static final String TOTAL_SIZE = "totalSize"; + + /** + * The name of the statistic Row Count to be published or gathered. + */ + public static final String ROW_COUNT = "numRows"; + + public static final String RUN_TIME_ROW_COUNT = "runTimeNumRows"; + + /** + * The name of the statistic Raw Data Size to be published or gathered. + */ + public static final String RAW_DATA_SIZE = "rawDataSize"; + + /** + * The name of the statistic for Number of Erasure Coded Files - to be published or gathered. + */ + public static final String NUM_ERASURE_CODED_FILES = "numFilesErasureCoded"; + + /** + * Temp dir for writing stats from tasks. + */ + public static final String STATS_TMP_LOC = "hive.stats.tmp.loc"; + + public static final String STATS_FILE_PREFIX = "tmpstats-"; + /** + * List of all supported statistics + */ + public static final List<String> SUPPORTED_STATS = ImmutableList.of( + NUM_FILES, ROW_COUNT, TOTAL_SIZE, RAW_DATA_SIZE, NUM_ERASURE_CODED_FILES); + + /** + * List of all statistics that need to be collected during query execution. These are + * statistics that inherently require a scan of the data. + */ + public static final List<String> STATS_REQUIRE_COMPUTE = ImmutableList.of(ROW_COUNT, RAW_DATA_SIZE); + + /** + * List of statistics that can be collected quickly without requiring a scan of the data. + */ + public static final List<String> FAST_STATS = ImmutableList.of( + NUM_FILES, TOTAL_SIZE, NUM_ERASURE_CODED_FILES); + + // This string constant is used to indicate to AlterHandler that + // alterPartition/alterTable is happening via statsTask or via user. + public static final String STATS_GENERATED = "STATS_GENERATED"; + + public static final String TASK = "TASK"; + + public static final String USER = "USER"; + + // This string constant is used by AlterHandler to figure out that it should not attempt to + // update stats. It is set by any client-side task which wishes to signal that no stats + // update should take place, such as with replication. + public static final String DO_NOT_UPDATE_STATS = "DO_NOT_UPDATE_STATS"; + + //This string constant will be persisted in metastore to indicate whether corresponding + //table or partition's statistics and table or partition's column statistics are accurate or not. + public static final String COLUMN_STATS_ACCURATE = "COLUMN_STATS_ACCURATE"; + + public static final String COLUMN_STATS = "COLUMN_STATS"; + + public static final String BASIC_STATS = "BASIC_STATS"; + + public static final String CASCADE = "CASCADE"; + + public static final String TRUE = "true"; + + public static final String FALSE = "false"; + + // The parameter keys for the table statistics. Those keys are excluded from 'show create table' command output. + public static final List<String> TABLE_PARAMS_STATS_KEYS = ImmutableList.of( + COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE, ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS, + NUM_ERASURE_CODED_FILES); + + private static class ColumnStatsAccurate { + private static ObjectReader objectReader; + private static ObjectWriter objectWriter; + + static { + ObjectMapper objectMapper = new ObjectMapper(); + objectReader = objectMapper.readerFor(ColumnStatsAccurate.class); + objectWriter = objectMapper.writerFor(ColumnStatsAccurate.class); + } + + static class BooleanSerializer extends JsonSerializer<Boolean> { + + @Override + public void serialize(Boolean value, JsonGenerator jsonGenerator, + SerializerProvider serializerProvider) throws IOException { + jsonGenerator.writeString(value.toString()); + } + } + + static class BooleanDeserializer extends JsonDeserializer<Boolean> { + + public Boolean deserialize(JsonParser jsonParser, + DeserializationContext deserializationContext) + throws IOException { + return Boolean.valueOf(jsonParser.getValueAsString()); + } + } + + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + @JsonSerialize(using = BooleanSerializer.class) + @JsonDeserialize(using = BooleanDeserializer.class) + @JsonProperty(BASIC_STATS) + boolean basicStats; + + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @JsonProperty(COLUMN_STATS) + @JsonSerialize(contentUsing = BooleanSerializer.class) + @JsonDeserialize(contentUsing = BooleanDeserializer.class) + TreeMap<String, Boolean> columnStats = new TreeMap<>(); + + } + + public static boolean areBasicStatsUptoDate(Map<String, String> params) { + if (params == null) { + return false; + } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + return stats.basicStats; + } + + public static boolean areColumnStatsUptoDate(Map<String, String> params, String colName) { + if (params == null) { + return false; + } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + return stats.columnStats.containsKey(colName); + } + + // It will only throw JSONException when stats.put(BASIC_STATS, TRUE) + // has duplicate key, which is not possible + // note that set basic stats false will wipe out column stats too. + public static void setBasicStatsState(Map<String, String> params, String setting) { + if (setting.equals(FALSE)) { + if (params!=null && params.containsKey(COLUMN_STATS_ACCURATE)) { + params.remove(COLUMN_STATS_ACCURATE); + } + return; + } + if (params == null) { + throw new RuntimeException("params are null...cant set columnstatstate!"); + } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + stats.basicStats = true; + try { + params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); + } catch (JsonProcessingException e) { + throw new RuntimeException("can't serialize column stats", e); + } + } + + public static void setColumnStatsState(Map<String, String> params, List<String> colNames) { + if (params == null) { + throw new RuntimeException("params are null...cant set columnstatstate!"); + } + if (colNames == null) { + return; + } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + + for (String colName : colNames) { + if (!stats.columnStats.containsKey(colName)) { + stats.columnStats.put(colName, true); + } + } + try { + params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); + } catch (JsonProcessingException e) { + LOG.trace(e.getMessage()); + } + } + + public static boolean canColumnStatsMerge(Map<String, String> params, String colName) { + if (params == null) { + return false; + } + // TODO: should this also check that the basic flag is valid? + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + return stats.columnStats.containsKey(colName); + } + + public static void clearColumnStatsState(Map<String, String> params) { + if (params == null) { + return; + } + + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + stats.columnStats.clear(); + + try { + params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); + } catch (JsonProcessingException e) { + LOG.trace(e.getMessage()); + } + } + + public static void removeColumnStatsState(Map<String, String> params, List<String> colNames) { + if (params == null) { + return; + } + try { + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + for (String string : colNames) { + stats.columnStats.remove(string); + } + params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); + } catch (JsonProcessingException e) { + LOG.trace(e.getMessage()); + } + } + + public static void setStatsStateForCreateTable(Map<String, String> params, + List<String> cols, String setting) { + if (TRUE.equals(setting)) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { + params.put(stat, "0"); + } + } + setBasicStatsState(params, setting); + if (TRUE.equals(setting)) { + setColumnStatsState(params, cols); + } + } + + private static ColumnStatsAccurate parseStatsAcc(String statsAcc) { + if (statsAcc == null) { + return new ColumnStatsAccurate(); + } + try { + return ColumnStatsAccurate.objectReader.readValue(statsAcc); + } catch (Exception e) { + ColumnStatsAccurate ret = new ColumnStatsAccurate(); + if (TRUE.equalsIgnoreCase(statsAcc)) { + ret.basicStats = true; + } + return ret; + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/35f86c74/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java new file mode 100644 index 0000000..d5dea4d --- /dev/null +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java @@ -0,0 +1,301 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hive.metastore.utils.StringUtils; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +/** + * Constants and utility functions for column types. This is explicitly done as constants in the + * class rather than an enum in order to interoperate with Hive's old serdeConstants. All type + * names in this class match the type names in Hive's serdeConstants class. They must continue + * to do so. + */ [email protected] [email protected] +public class ColumnType { + public static final String VOID_TYPE_NAME = "void"; + + public static final String BOOLEAN_TYPE_NAME = "boolean"; + + public static final String TINYINT_TYPE_NAME = "tinyint"; + + public static final String SMALLINT_TYPE_NAME = "smallint"; + + public static final String INT_TYPE_NAME = "int"; + + public static final String BIGINT_TYPE_NAME = "bigint"; + + public static final String FLOAT_TYPE_NAME = "float"; + + public static final String DOUBLE_TYPE_NAME = "double"; + + public static final String STRING_TYPE_NAME = "string"; + + public static final String CHAR_TYPE_NAME = "char"; + + public static final String VARCHAR_TYPE_NAME = "varchar"; + + public static final String DATE_TYPE_NAME = "date"; + + public static final String DATETIME_TYPE_NAME = "datetime"; + + public static final String TIMESTAMP_TYPE_NAME = "timestamp"; + + public static final String DECIMAL_TYPE_NAME = "decimal"; + + public static final String BINARY_TYPE_NAME = "binary"; + + public static final String INTERVAL_YEAR_MONTH_TYPE_NAME = "interval_year_month"; + + public static final String INTERVAL_DAY_TIME_TYPE_NAME = "interval_day_time"; + + public static final String TIMESTAMPTZ_TYPE_NAME = "timestamp with time zone"; + + public static final String LIST_TYPE_NAME = "array"; + + public static final String MAP_TYPE_NAME = "map"; + + public static final String STRUCT_TYPE_NAME = "struct"; + + public static final String UNION_TYPE_NAME = "uniontype"; + + public static final String LIST_COLUMNS = "columns"; + + public static final String LIST_COLUMN_TYPES = "columns.types"; + + public static final String COLUMN_NAME_DELIMITER = "column.name.delimiter"; + + public static final Set<String> PrimitiveTypes = StringUtils.asSet( + VOID_TYPE_NAME, + BOOLEAN_TYPE_NAME, + TINYINT_TYPE_NAME, + SMALLINT_TYPE_NAME, + INT_TYPE_NAME, + BIGINT_TYPE_NAME, + FLOAT_TYPE_NAME, + DOUBLE_TYPE_NAME, + STRING_TYPE_NAME, + VARCHAR_TYPE_NAME, + CHAR_TYPE_NAME, + DATE_TYPE_NAME, + DATETIME_TYPE_NAME, + TIMESTAMP_TYPE_NAME, + INTERVAL_YEAR_MONTH_TYPE_NAME, + INTERVAL_DAY_TIME_TYPE_NAME, + DECIMAL_TYPE_NAME, + BINARY_TYPE_NAME, + TIMESTAMPTZ_TYPE_NAME); + + public static final Set<String> StringTypes = StringUtils.asSet( + STRING_TYPE_NAME, + VARCHAR_TYPE_NAME, + CHAR_TYPE_NAME + ); + + public static final Set<String> NumericTypes = StringUtils.asSet( + TINYINT_TYPE_NAME, + SMALLINT_TYPE_NAME, + INT_TYPE_NAME, + BIGINT_TYPE_NAME, + FLOAT_TYPE_NAME, + DOUBLE_TYPE_NAME, + DECIMAL_TYPE_NAME + ); + + // This intentionally does not include interval types. + public static final Set<String> DateTimeTypes = StringUtils.asSet( + DATE_TYPE_NAME, + DATETIME_TYPE_NAME, + TIMESTAMP_TYPE_NAME, + TIMESTAMPTZ_TYPE_NAME + ); + + // This map defines the progression of up casts in numeric types. + public static final Map<String, Integer> NumericCastOrder = new HashMap<>(); + + static { + NumericCastOrder.put(TINYINT_TYPE_NAME, 1); + NumericCastOrder.put(SMALLINT_TYPE_NAME, 2); + NumericCastOrder.put(INT_TYPE_NAME, 3); + NumericCastOrder.put(BIGINT_TYPE_NAME, 4); + NumericCastOrder.put(DECIMAL_TYPE_NAME, 5); + NumericCastOrder.put(FLOAT_TYPE_NAME, 6); + NumericCastOrder.put(DOUBLE_TYPE_NAME, 7); + } + + private static final Map<String, String> alternateTypeNames = new HashMap<>(); + + static { + alternateTypeNames.put("integer", INT_TYPE_NAME); + alternateTypeNames.put("numeric", DECIMAL_TYPE_NAME); + } + + public static final Set<String> CollectionTypes = StringUtils.asSet( + LIST_TYPE_NAME, + MAP_TYPE_NAME); + + public static final Set<String> IntegralTypes = StringUtils.asSet( + TINYINT_TYPE_NAME, + SMALLINT_TYPE_NAME, + INT_TYPE_NAME, + BIGINT_TYPE_NAME); + + public static final Set<String> AllTypes = StringUtils.asSet( + VOID_TYPE_NAME, + BOOLEAN_TYPE_NAME, + TINYINT_TYPE_NAME, + SMALLINT_TYPE_NAME, + INT_TYPE_NAME, + BIGINT_TYPE_NAME, + FLOAT_TYPE_NAME, + DOUBLE_TYPE_NAME, + STRING_TYPE_NAME, + CHAR_TYPE_NAME, + VARCHAR_TYPE_NAME, + DATE_TYPE_NAME, + DATETIME_TYPE_NAME, + TIMESTAMP_TYPE_NAME, + DECIMAL_TYPE_NAME, + BINARY_TYPE_NAME, + INTERVAL_YEAR_MONTH_TYPE_NAME, + INTERVAL_DAY_TIME_TYPE_NAME, + TIMESTAMPTZ_TYPE_NAME, + LIST_TYPE_NAME, + MAP_TYPE_NAME, + STRUCT_TYPE_NAME, + UNION_TYPE_NAME, + LIST_COLUMNS, + LIST_COLUMN_TYPES, + COLUMN_NAME_DELIMITER + ); + + /** + * Given a type string return the type name. For example, passing in the type string + * <tt>varchar(256)</tt> will return <tt>varchar</tt>. + * @param typeString Type string + * @return type name, guaranteed to be in lower case + */ + public static String getTypeName(String typeString) { + if (typeString == null) return null; + String protoType = typeString.toLowerCase().split("\\W")[0]; + String realType = alternateTypeNames.get(protoType); + return realType == null ? protoType : realType; + } + + public static boolean areColTypesCompatible(String from, String to) { + if (from.equals(to)) return true; + + if (PrimitiveTypes.contains(from) && PrimitiveTypes.contains(to)) { + // They aren't the same, but we may be able to do a cast + + // If they are both types of strings, that should be fine + if (StringTypes.contains(from) && StringTypes.contains(to)) return true; + + // If both are numeric, make sure the new type is larger than the old. + if (NumericTypes.contains(from) && NumericTypes.contains(to)) { + return NumericCastOrder.get(from) < NumericCastOrder.get(to); + } + + // Allow string to double conversion + if (StringTypes.contains(from) && to.equals(DOUBLE_TYPE_NAME)) return true; + + // Void can go to anything + if (from.equals(VOID_TYPE_NAME)) return true; + + // Allow date to string casts. NOTE: I suspect this is the reverse of what we actually + // want, but it matches the code in o.a.h.h.serde2.typeinfo.TypeInfoUtils. I can't see how + // users would be altering date columns into string columns. The other I easily see since + // Hive did not originally support datetime types. Also, the comment in the Hive code + // says string to date, even though the code does the opposite. But for now I'm keeping + // this as is so the functionality matches. + if (DateTimeTypes.contains(from) && StringTypes.contains(to)) return true; + + // Allow numeric to string + if (NumericTypes.contains(from) && StringTypes.contains(to)) return true; + + } + return false; + } + + // These aren't column types, they are info for how things are stored in thrift. + // It didn't seem useful to create another Constants class just for these though. + public static final String SERIALIZATION_FORMAT = "serialization.format"; + + public static final String SERIALIZATION_LIB = "serialization.lib"; + + public static final String SERIALIZATION_DDL = "serialization.ddl"; + + public static final char COLUMN_COMMENTS_DELIMITER = '\0'; + + private static HashMap<String, String> typeToThriftTypeMap; + static { + typeToThriftTypeMap = new HashMap<>(); + typeToThriftTypeMap.put(BOOLEAN_TYPE_NAME, "bool"); + typeToThriftTypeMap.put(TINYINT_TYPE_NAME, "byte"); + typeToThriftTypeMap.put(SMALLINT_TYPE_NAME, "i16"); + typeToThriftTypeMap.put(INT_TYPE_NAME, "i32"); + typeToThriftTypeMap.put(BIGINT_TYPE_NAME, "i64"); + typeToThriftTypeMap.put(DOUBLE_TYPE_NAME, "double"); + typeToThriftTypeMap.put(FLOAT_TYPE_NAME, "float"); + typeToThriftTypeMap.put(LIST_TYPE_NAME, "list"); + typeToThriftTypeMap.put(MAP_TYPE_NAME, "map"); + typeToThriftTypeMap.put(STRING_TYPE_NAME, "string"); + typeToThriftTypeMap.put(BINARY_TYPE_NAME, "binary"); + // These 4 types are not supported yet. + // We should define a complex type date in thrift that contains a single int + // member, and DynamicSerDe + // should convert it to date type at runtime. + typeToThriftTypeMap.put(DATE_TYPE_NAME, "date"); + typeToThriftTypeMap.put(DATETIME_TYPE_NAME, "datetime"); + typeToThriftTypeMap.put(TIMESTAMP_TYPE_NAME, "timestamp"); + typeToThriftTypeMap.put(DECIMAL_TYPE_NAME, "decimal"); + typeToThriftTypeMap.put(INTERVAL_YEAR_MONTH_TYPE_NAME, INTERVAL_YEAR_MONTH_TYPE_NAME); + typeToThriftTypeMap.put(INTERVAL_DAY_TIME_TYPE_NAME, INTERVAL_DAY_TIME_TYPE_NAME); + } + + /** + * Convert type to ThriftType. We do that by tokenizing the type and convert + * each token. + */ + public static String typeToThriftType(String type) { + StringBuilder thriftType = new StringBuilder(); + int last = 0; + boolean lastAlphaDigit = Character.isLetterOrDigit(type.charAt(last)); + for (int i = 1; i <= type.length(); i++) { + if (i == type.length() + || Character.isLetterOrDigit(type.charAt(i)) != lastAlphaDigit) { + String token = type.substring(last, i); + last = i; + String thriftToken = typeToThriftTypeMap.get(token); + thriftType.append(thriftToken == null ? token : thriftToken); + lastAlphaDigit = !lastAlphaDigit; + } + } + return thriftType.toString(); + } + + public static String getListType(String t) { + return "array<" + t + ">"; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/35f86c74/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/FileMetadataHandler.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/FileMetadataHandler.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/FileMetadataHandler.java new file mode 100644 index 0000000..ff30260 --- /dev/null +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/FileMetadataHandler.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.FileMetadataExprType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The base implementation of a file metadata handler for a specific file type. + * There are currently two classes for each file type (of 1), this one, which is very simple due + * to the fact that it just calls the proxy class for most calls; and the proxy class, that + * contains the actual implementation that depends on some stuff in QL (for ORC). + */ +public abstract class FileMetadataHandler { + protected static final Logger LOG = LoggerFactory.getLogger(FileMetadataHandler.class); + + private Configuration conf; + private PartitionExpressionProxy expressionProxy; + private FileFormatProxy fileFormatProxy; + private MetadataStore store; + + /** + * Same as RawStore.getFileMetadataByExpr. + */ + public abstract void getFileMetadataByExpr(List<Long> fileIds, byte[] expr, + ByteBuffer[] metadatas, ByteBuffer[] results, boolean[] eliminated) throws IOException; + + protected abstract FileMetadataExprType getType(); + + protected PartitionExpressionProxy getExpressionProxy() { + return expressionProxy; + } + + protected FileFormatProxy getFileFormatProxy() { + return fileFormatProxy; + } + + protected MetadataStore getStore() { + return store; + } + + /** + * Configures the handler. Called once before use. + * @param conf Config. + * @param expressionProxy Expression proxy to access ql stuff. + * @param store Storage interface to manipulate the metadata. + */ + public void configure( + Configuration conf, PartitionExpressionProxy expressionProxy, MetadataStore store) { + this.conf = conf; + this.expressionProxy = expressionProxy; + this.store = store; + this.fileFormatProxy = expressionProxy.getFileFormatProxy(getType()); + } + + /** + * Caches the file metadata for a particular file. + * @param fileId File id. + * @param fs The filesystem of the file. + * @param path Path to the file. + */ + public void cacheFileMetadata(long fileId, FileSystem fs, Path path) + throws IOException, InterruptedException { + // ORC is in ql, so we cannot do anything here. For now, all the logic is in the proxy. + ByteBuffer[] cols = fileFormatProxy.getAddedColumnsToCache(); + ByteBuffer[] vals = (cols == null) ? null : new ByteBuffer[cols.length]; + ByteBuffer metadata = fileFormatProxy.getMetadataToCache(fs, path, vals); + LOG.info("Caching file metadata for " + path + ", size " + metadata.remaining()); + store.storeFileMetadata(fileId, metadata, cols, vals); + } + + /** + * @return the added column names to be cached in metastore with the metadata for this type. + */ + public ByteBuffer[] createAddedCols() { + return fileFormatProxy.getAddedColumnsToCache(); + } + + /** + * @return the values for the added columns returned by createAddedCols for respective metadatas. + */ + public ByteBuffer[][] createAddedColVals(List<ByteBuffer> metadata) { + return fileFormatProxy.getAddedValuesToCache(metadata); + } +}
