http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java new file mode 100644 index 0000000..01f3385 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.merge; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; + +public class DecimalColumnStatsMerger extends ColumnStatsMerger { + @Override + public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + DecimalColumnStatsDataInspector aggregateData = + (DecimalColumnStatsDataInspector) aggregateColStats.getStatsData().getDecimalStats(); + DecimalColumnStatsDataInspector newData = + (DecimalColumnStatsDataInspector) newColStats.getStatsData().getDecimalStats(); + Decimal lowValue = aggregateData.getLowValue() != null + && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData + .getLowValue() : newData.getLowValue(); + aggregateData.setLowValue(lowValue); + Decimal highValue = aggregateData.getHighValue() != null + && (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData + .getHighValue() : newData.getHighValue(); + aggregateData.setHighValue(highValue); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); + long ndv = -1; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setNdvEstimator(oldEst); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } + LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); + aggregateData.setNumDVs(ndv); + } + } +}
http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java new file mode 100644 index 0000000..6a95751 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.merge; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; + +public class DoubleColumnStatsMerger extends ColumnStatsMerger { + @Override + public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + DoubleColumnStatsDataInspector aggregateData = + (DoubleColumnStatsDataInspector) aggregateColStats.getStatsData().getDoubleStats(); + DoubleColumnStatsDataInspector newData = + (DoubleColumnStatsDataInspector) newColStats.getStatsData().getDoubleStats(); + aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); + aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); + long ndv = -1; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setNdvEstimator(oldEst); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } + LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); + aggregateData.setNumDVs(ndv); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java new file mode 100644 index 0000000..ca1a912 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.merge; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; + +public class LongColumnStatsMerger extends ColumnStatsMerger { + @Override + public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + LongColumnStatsDataInspector aggregateData = + (LongColumnStatsDataInspector) aggregateColStats.getStatsData().getLongStats(); + LongColumnStatsDataInspector newData = + (LongColumnStatsDataInspector) newColStats.getStatsData().getLongStats(); + aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); + aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); + long ndv = -1; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setNdvEstimator(oldEst); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } + LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); + aggregateData.setNumDVs(ndv); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java new file mode 100644 index 0000000..d6b4478 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.merge; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; + +public class StringColumnStatsMerger extends ColumnStatsMerger { + @Override + public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + StringColumnStatsDataInspector aggregateData = + (StringColumnStatsDataInspector) aggregateColStats.getStatsData().getStringStats(); + StringColumnStatsDataInspector newData = + (StringColumnStatsDataInspector) newColStats.getStatsData().getStringStats(); + aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); + aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); + long ndv = -1; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setNdvEstimator(oldEst); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } + LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); + aggregateData.setNumDVs(ndv); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 5a6ef99..5933318 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -499,12 +499,6 @@ public class MetastoreConf { "A comma separated list of metrics reporters to start"), MULTITHREADED("javax.jdo.option.Multithreaded", "javax.jdo.option.Multithreaded", true, "Set this to true if multiple threads access metastore through JDO concurrently."), - ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS("metastore.orm.retrieveMapNullsAsEmptyStrings", - "hive.metastore.orm.retrieveMapNullsAsEmptyStrings",false, - "Thrift does not support nulls in maps, so any nulls present in maps retrieved from ORM must " + - "either be pruned or converted to empty strings. Some backing dbs such as Oracle persist empty strings " + - "as nulls, so we should set this parameter if we wish to reverse that behaviour. For others, " + - "pruning is the correct behaviour"), MAX_OPEN_TXNS("metastore.max.open.txns", "hive.max.open.txns", 100000, "Maximum number of open transactions. If \n" + "current open transactions reach this limit, future open transaction requests will be \n" + @@ -512,6 +506,21 @@ public class MetastoreConf { NON_TRANSACTIONAL_READ("javax.jdo.option.NonTransactionalRead", "javax.jdo.option.NonTransactionalRead", true, "Reads outside of transactions"), + NOTIFICATION_SEQUENCE_LOCK_MAX_RETRIES("metastore.notification.sequence.lock.max.retries", + "hive.notification.sequence.lock.max.retries", 5, + "Number of retries required to acquire a lock when getting the next notification sequential ID for entries " + + "in the NOTIFICATION_LOG table."), + NOTIFICATION_SEQUENCE_LOCK_RETRY_SLEEP_INTERVAL( + "metastore.notification.sequence.lock.retry.sleep.interval", + "hive.notification.sequence.lock.retry.sleep.interval", 500, TimeUnit.MILLISECONDS, + "Sleep interval between retries to acquire a notification lock as described part of property " + + NOTIFICATION_SEQUENCE_LOCK_MAX_RETRIES.name()), + ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS("metastore.orm.retrieveMapNullsAsEmptyStrings", + "hive.metastore.orm.retrieveMapNullsAsEmptyStrings",false, + "Thrift does not support nulls in maps, so any nulls present in maps retrieved from ORM must " + + "either be pruned or converted to empty strings. Some backing dbs such as Oracle persist empty strings " + + "as nulls, so we should set this parameter if we wish to reverse that behaviour. For others, " + + "pruning is the correct behaviour"), PARTITION_NAME_WHITELIST_PATTERN("metastore.partition.name.whitelist.pattern", "hive.metastore.partition.name.whitelist.pattern", "", "Partition names will be checked against this regex pattern and rejected if not matched."), @@ -591,6 +600,8 @@ public class MetastoreConf { "Metastore SSL certificate truststore password."), STATS_AUTO_GATHER("metastore.stats.autogather", "hive.stats.autogather", true, "A flag to gather statistics (only basic) automatically during the INSERT OVERWRITE command."), + STATS_FETCH_BITVECTOR("metastore.stats.fetch.bitvector", "hive.stats.fetch.bitvector", false, + "Whether we fetch bitvector when we compute ndv. Users can turn it off if they want to use old schema"), STATS_NDV_TUNER("metastore.stats.ndv.tuner", "hive.metastore.stats.ndv.tuner", 0.0, "Provides a tunable parameter between the lower bound and the higher bound of ndv for aggregate ndv across all the partitions. \n" + "The lower bound is equal to the maximum of ndv of all the partitions. The higher bound is equal to the sum of ndv of all the partitions.\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java new file mode 100644 index 0000000..d608e50 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java @@ -0,0 +1,605 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.parser; + +import java.util.Date; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.Stack; + +import org.antlr.runtime.ANTLRStringStream; +import org.antlr.runtime.CharStream; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.ColumnType; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Sets; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; + +/** + * The Class representing the filter as a binary tree. The tree has TreeNode's + * at intermediate level and the leaf level nodes are of type LeafNode. + */ +public class ExpressionTree { + /** The empty tree that can be returned for an empty filter. */ + public static final ExpressionTree EMPTY_TREE = new ExpressionTree(); + + /** The logical operations supported. */ + public enum LogicalOperator { + AND, + OR + } + + /** The operators supported. */ + public enum Operator { + EQUALS ("=", "==", "="), + GREATERTHAN (">"), + LESSTHAN ("<"), + LESSTHANOREQUALTO ("<="), + GREATERTHANOREQUALTO (">="), + LIKE ("LIKE", "matches", "like"), + NOTEQUALS2 ("!=", "!=", "<>"), + NOTEQUALS ("<>", "!=", "<>"); + + private final String op; + private final String jdoOp; + private final String sqlOp; + + // private constructor + Operator(String op){ + this.op = op; + this.jdoOp = op; + this.sqlOp = op; + } + + Operator(String op, String jdoOp, String sqlOp){ + this.op = op; + this.jdoOp = jdoOp; + this.sqlOp = sqlOp; + } + + public String getOp() { + return op; + } + + public String getJdoOp() { + return jdoOp; + } + + public String getSqlOp() { + return sqlOp; + } + + public static Operator fromString(String inputOperator) { + for(Operator op : Operator.values()) { + if(op.getOp().equals(inputOperator)){ + return op; + } + } + + throw new Error("Invalid value " + inputOperator + + " for " + Operator.class.getSimpleName()); + } + + @Override + public String toString() { + return op; + } + + } + + /** + * Depth first traversal of ExpressionTree. + * The users should override the subset of methods to do their stuff. + */ + public static class TreeVisitor { + private void visit(TreeNode node) throws MetaException { + if (shouldStop()) return; + assert node != null && node.getLhs() != null && node.getRhs() != null; + beginTreeNode(node); + node.lhs.accept(this); + midTreeNode(node); + node.rhs.accept(this); + endTreeNode(node); + } + + protected void beginTreeNode(TreeNode node) throws MetaException {} + protected void midTreeNode(TreeNode node) throws MetaException {} + protected void endTreeNode(TreeNode node) throws MetaException {} + protected void visit(LeafNode node) throws MetaException {} + protected boolean shouldStop() { + return false; + } + } + + /** + * Helper class that wraps the stringbuilder used to build the filter over the tree, + * as well as error propagation in two modes - expect errors, i.e. filter might as well + * be unbuildable and that's not a failure condition; or don't expect errors, i.e. filter + * must be buildable. + */ + public static class FilterBuilder { + private final StringBuilder result = new StringBuilder(); + private String errorMessage = null; + private boolean expectNoErrors = false; + + public FilterBuilder(boolean expectNoErrors) { + this.expectNoErrors = expectNoErrors; + } + + public String getFilter() throws MetaException { + assert errorMessage == null; + if (errorMessage != null) { + throw new MetaException("Trying to get result after error: " + errorMessage); + } + return result.toString(); + } + + @Override + public String toString() { + try { + return getFilter(); + } catch (MetaException ex) { + throw new RuntimeException(ex); + } + } + + public String getErrorMessage() { + return errorMessage; + } + + public boolean hasError() { + return errorMessage != null; + } + + public FilterBuilder append(String filterPart) { + this.result.append(filterPart); + return this; + } + + public void setError(String errorMessage) throws MetaException { + this.errorMessage = errorMessage; + if (expectNoErrors) { + throw new MetaException(errorMessage); + } + } + } + + /** + * The Class representing a Node in the ExpressionTree. + */ + public static class TreeNode { + private TreeNode lhs; + private LogicalOperator andOr; + private TreeNode rhs; + + public TreeNode() { + } + + public TreeNode(TreeNode lhs, LogicalOperator andOr, TreeNode rhs) { + this.lhs = lhs; + this.andOr = andOr; + this.rhs = rhs; + } + + public TreeNode getLhs() { + return lhs; + } + + public LogicalOperator getAndOr() { + return andOr; + } + + public TreeNode getRhs() { + return rhs; + } + + /** Double dispatch for TreeVisitor. */ + protected void accept(TreeVisitor visitor) throws MetaException { + visitor.visit(this); + } + + /** + * Generates a JDO filter statement + * @param table + * The table on which the filter is applied. If table is not null, + * then this method generates a JDO statement to get all partitions + * of the table that match the filter. + * If table is null, then this method generates a JDO statement to get all + * tables that match the filter. + * @param params + * A map of parameter key to values for the filter statement. + * @param filterBuffer The filter builder that is used to build filter. + * @throws MetaException + */ + public void generateJDOFilter(Configuration conf, Table table, + Map<String, Object> params, FilterBuilder filterBuffer) throws MetaException { + if (filterBuffer.hasError()) return; + if (lhs != null) { + filterBuffer.append (" ("); + lhs.generateJDOFilter(conf, table, params, filterBuffer); + + if (rhs != null) { + if( andOr == LogicalOperator.AND ) { + filterBuffer.append(" && "); + } else { + filterBuffer.append(" || "); + } + + rhs.generateJDOFilter(conf, table, params, filterBuffer); + } + filterBuffer.append (") "); + } + } + } + + /** + * The Class representing the leaf level nodes in the ExpressionTree. + */ + public static class LeafNode extends TreeNode { + public String keyName; + public Operator operator; + /** Constant expression side of the operator. Can currently be a String or a Long. */ + public Object value; + public boolean isReverseOrder = false; + private static final String PARAM_PREFIX = "hive_filter_param_"; + + @Override + protected void accept(TreeVisitor visitor) throws MetaException { + visitor.visit(this); + } + + @Override + public void generateJDOFilter(Configuration conf, Table table, Map<String, Object> params, + FilterBuilder filterBuilder) throws MetaException { + if (table != null) { + generateJDOFilterOverPartitions(conf, table, params, filterBuilder); + } else { + generateJDOFilterOverTables(params, filterBuilder); + } + } + + //can only support "=" and "!=" for now, because our JDO lib is buggy when + // using objects from map.get() + private static final Set<Operator> TABLE_FILTER_OPS = Sets.newHashSet( + Operator.EQUALS, Operator.NOTEQUALS, Operator.NOTEQUALS2, Operator.LIKE); + + private void generateJDOFilterOverTables(Map<String, Object> params, + FilterBuilder filterBuilder) throws MetaException { + if (keyName.equals(hive_metastoreConstants.HIVE_FILTER_FIELD_OWNER)) { + keyName = "this.owner"; + } else if (keyName.equals(hive_metastoreConstants.HIVE_FILTER_FIELD_LAST_ACCESS)) { + //lastAccessTime expects an integer, so we cannot use the "like operator" + if (operator == Operator.LIKE) { + filterBuilder.setError("Like is not supported for HIVE_FILTER_FIELD_LAST_ACCESS"); + return; + } + keyName = "this.lastAccessTime"; + } else if (keyName.startsWith(hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS)) { + if (!TABLE_FILTER_OPS.contains(operator)) { + filterBuilder.setError("Only " + TABLE_FILTER_OPS + " are supported " + + "operators for HIVE_FILTER_FIELD_PARAMS"); + return; + } + String paramKeyName = keyName.substring(hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS.length()); + keyName = "this.parameters.get(\"" + paramKeyName + "\")"; + //value is persisted as a string in the db, so make sure it's a string here + // in case we get a long. + value = value.toString(); + } else { + filterBuilder.setError("Invalid key name in filter. " + + "Use constants from org.apache.hadoop.hive.metastore.api"); + return; + } + generateJDOFilterGeneral(params, filterBuilder); + } + + /** + * Generates a general filter. Given a map of <key, value>, + * generates a statement of the form: + * key1 operator value2 (&& | || ) key2 operator value2 ... + * + * Currently supported types for value are String and Long. + * The LIKE operator for Longs is unsupported. + */ + private void generateJDOFilterGeneral(Map<String, Object> params, + FilterBuilder filterBuilder) throws MetaException { + String paramName = PARAM_PREFIX + params.size(); + params.put(paramName, value); + + if (isReverseOrder) { + if (operator == Operator.LIKE) { + filterBuilder.setError("Value should be on the RHS for LIKE operator : " + + "Key <" + keyName + ">"); + } else { + filterBuilder.append(paramName + " " + operator.getJdoOp() + " " + keyName); + } + } else { + if (operator == Operator.LIKE) { + filterBuilder.append(" " + keyName + "." + operator.getJdoOp() + "(" + paramName + ") "); + } else { + filterBuilder.append(" " + keyName + " " + operator.getJdoOp() + " " + paramName); + } + } + } + + private void generateJDOFilterOverPartitions(Configuration conf, Table table, + Map<String, Object> params, FilterBuilder filterBuilder) throws MetaException { + int partitionColumnCount = table.getPartitionKeys().size(); + int partitionColumnIndex = getPartColIndexForFilter(table, filterBuilder); + if (filterBuilder.hasError()) return; + + boolean canPushDownIntegral = + MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.INTEGER_JDO_PUSHDOWN); + String valueAsString = getJdoFilterPushdownParam( + table, partitionColumnIndex, filterBuilder, canPushDownIntegral); + if (filterBuilder.hasError()) return; + + String paramName = PARAM_PREFIX + params.size(); + params.put(paramName, valueAsString); + + boolean isOpEquals = operator == Operator.EQUALS; + if (isOpEquals || operator == Operator.NOTEQUALS || operator == Operator.NOTEQUALS2) { + makeFilterForEquals(keyName, valueAsString, paramName, params, + partitionColumnIndex, partitionColumnCount, isOpEquals, filterBuilder); + return; + } + //get the value for a partition key form MPartition.values (PARTITION_KEY_VALUES) + String valString = "values.get(" + partitionColumnIndex + ")"; + + if (operator == Operator.LIKE) { + if (isReverseOrder) { + // For LIKE, the value should be on the RHS. + filterBuilder.setError( + "Value should be on the RHS for LIKE operator : Key <" + keyName + ">"); + } + // TODO: in all likelihood, this won't actually work. Keep it for backward compat. + filterBuilder.append(" " + valString + "." + operator.getJdoOp() + "(" + paramName + ") "); + } else { + filterBuilder.append(isReverseOrder + ? paramName + " " + operator.getJdoOp() + " " + valString + : " " + valString + " " + operator.getJdoOp() + " " + paramName); + } + } + + /** + * @return true iff filter pushdown for this operator can be done for integral types. + */ + public boolean canJdoUseStringsWithIntegral() { + return (operator == Operator.EQUALS) + || (operator == Operator.NOTEQUALS) + || (operator == Operator.NOTEQUALS2); + } + + /** + * Get partition column index in the table partition column list that + * corresponds to the key that is being filtered on by this tree node. + * @param table The table. + * @param filterBuilder filter builder used to report error, if any. + * @return The index. + */ + public int getPartColIndexForFilter( + Table table, FilterBuilder filterBuilder) throws MetaException { + int partitionColumnIndex; + assert (table.getPartitionKeys().size() > 0); + for (partitionColumnIndex = 0; partitionColumnIndex < table.getPartitionKeys().size(); + ++partitionColumnIndex) { + if (table.getPartitionKeys().get(partitionColumnIndex).getName(). + equalsIgnoreCase(keyName)) { + break; + } + } + if( partitionColumnIndex == table.getPartitionKeys().size()) { + filterBuilder.setError("Specified key <" + keyName + + "> is not a partitioning key for the table"); + return -1; + } + + return partitionColumnIndex; + } + + /** + * Validates and gets the query parameter for JDO filter pushdown based on the column + * and the constant stored in this node. + * @param table The table. + * @param partColIndex The index of the column to check. + * @param filterBuilder filter builder used to report error, if any. + * @return The parameter string. + */ + private String getJdoFilterPushdownParam(Table table, int partColIndex, + FilterBuilder filterBuilder, boolean canPushDownIntegral) throws MetaException { + boolean isIntegralSupported = canPushDownIntegral && canJdoUseStringsWithIntegral(); + String colType = table.getPartitionKeys().get(partColIndex).getType(); + // Can only support partitions whose types are string, or maybe integers + if (!colType.equals(ColumnType.STRING_TYPE_NAME) + && (!isIntegralSupported || !ColumnType.IntegralTypes.contains(colType))) { + filterBuilder.setError("Filtering is supported only on partition keys of type " + + "string" + (isIntegralSupported ? ", or integral types" : "")); + return null; + } + + // There's no support for date cast in JDO. Let's convert it to string; the date + // columns have been excluded above, so it will either compare w/string or fail. + Object val = value; + if (value instanceof Date) { + val = MetaStoreUtils.PARTITION_DATE_FORMAT.get().format((Date)value); + } + boolean isStringValue = val instanceof String; + if (!isStringValue && (!isIntegralSupported || !(val instanceof Long))) { + filterBuilder.setError("Filtering is supported only on partition keys of type " + + "string" + (isIntegralSupported ? ", or integral types" : "")); + return null; + } + + return isStringValue ? (String)val : Long.toString((Long)val); + } + } + + public void accept(TreeVisitor treeVisitor) throws MetaException { + if (this.root != null) { + this.root.accept(treeVisitor); + } + } + + /** + * For equals and not-equals, we can make the JDO query much faster by filtering + * based on the partition name. For a condition like ds="2010-10-01", we can see + * if there are any partitions with a name that contains the substring "ds=2010-10-01/" + * False matches aren't possible since "=" is escaped for partition names + * and the trailing '/' ensures that we won't get a match with ds=2010-10-011 + * Note that filters on integral type equality also work correctly by virtue of + * comparing them as part of ds=1234 string. + * + * Two cases to keep in mind: Case with only one partition column (no '/'s) + * Case where the partition key column is at the end of the name. (no + * tailing '/') + * + * @param keyName name of the partition column e.g. ds. + * @param value The value to compare to. + * @param paramName name of the parameter to use for JDOQL. + * @param params a map from the parameter name to their values. + * @param keyPos The index of the requisite partition column in the list of such columns. + * @param keyCount Partition column count for the table. + * @param isEq whether the operator is equals, or not-equals. + * @param fltr Filter builder used to append the filter, or report errors. + */ + private static void makeFilterForEquals(String keyName, String value, String paramName, + Map<String, Object> params, int keyPos, int keyCount, boolean isEq, FilterBuilder fltr) + throws MetaException { + Map<String, String> partKeyToVal = new HashMap<>(); + partKeyToVal.put(keyName, value); + // If a partition has multiple partition keys, we make the assumption that + // makePartName with one key will return a substring of the name made + // with both all the keys. + String escapedNameFragment = Warehouse.makePartName(partKeyToVal, false); + if (keyCount == 1) { + // Case where this is no other partition columns + params.put(paramName, escapedNameFragment); + fltr.append("partitionName ").append(isEq ? "== " : "!= ").append(paramName); + } else if (keyPos + 1 == keyCount) { + // Case where the partition column is at the end of the name. There will + // be a leading '/' but no trailing '/' + params.put(paramName, "/" + escapedNameFragment); + fltr.append(isEq ? "" : "!").append("partitionName.endsWith(") + .append(paramName).append(")"); + } else if (keyPos == 0) { + // Case where the partition column is at the beginning of the name. There will + // be a trailing '/' but no leading '/' + params.put(paramName, escapedNameFragment + "/"); + fltr.append(isEq ? "" : "!").append("partitionName.startsWith(") + .append(paramName).append(")"); + } else { + // Case where the partition column is in the middle of the name. There will + // be a leading '/' and an trailing '/' + params.put(paramName, "/" + escapedNameFragment + "/"); + fltr.append("partitionName.indexOf(").append(paramName).append(")") + .append(isEq ? ">= 0" : "< 0"); + } + } + + /** + * The root node for the tree. + */ + private TreeNode root = null; + + /** + * The node stack used to keep track of the tree nodes during parsing. + */ + private final Stack<TreeNode> nodeStack = new Stack<>(); + + public TreeNode getRoot() { + return this.root; + } + + @VisibleForTesting + public void setRootForTest(TreeNode tn) { + this.root = tn; + } + + + /** + * Adds a intermediate node of either type(AND/OR). Pops last two nodes from + * the stack and sets them as children of the new node and pushes itself + * onto the stack. + * @param andOr the operator type + */ + public void addIntermediateNode(LogicalOperator andOr) { + + TreeNode rhs = nodeStack.pop(); + TreeNode lhs = nodeStack.pop(); + TreeNode newNode = new TreeNode(lhs, andOr, rhs); + nodeStack.push(newNode); + root = newNode; + } + + /** + * Adds a leaf node, pushes the new node onto the stack. + * @param newNode the new node + */ + public void addLeafNode(LeafNode newNode) { + if( root == null ) { + root = newNode; + } + nodeStack.push(newNode); + } + + /** Generate the JDOQL filter for the given expression tree + * @param table the table being queried + * @param params the input map which is updated with the + * the parameterized values. Keys are the parameter names and values + * are the parameter values + * @param filterBuilder the filter builder to append to. + */ + public void generateJDOFilterFragment(Configuration conf, Table table, + Map<String, Object> params, FilterBuilder filterBuilder) throws MetaException { + if (root == null) { + return; + } + + filterBuilder.append(" && ( "); + root.generateJDOFilter(conf, table, params, filterBuilder); + filterBuilder.append(" )"); + } + + /** Case insensitive ANTLR string stream */ + public static class ANTLRNoCaseStringStream extends ANTLRStringStream { + public ANTLRNoCaseStringStream (String input) { + super(input); + } + + @Override + public int LA (int i) { + int returnChar = super.LA (i); + + if (returnChar == CharStream.EOF) { + return returnChar; + } + else if (returnChar == 0) { + return returnChar; + } + + return Character.toUpperCase ((char) returnChar); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/parser/Filter.g ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/parser/Filter.g b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/parser/Filter.g new file mode 100644 index 0000000..81111a0 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/parser/Filter.g @@ -0,0 +1,484 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +grammar Filter; + +options +{ + k=3; +} + + +// Package headers +@header { +package org.apache.hadoop.hive.metastore.parser; + +import org.apache.hadoop.hive.metastore.parser.ExpressionTree; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator; +} + +@lexer::header { +package org.apache.hadoop.hive.metastore.parser; + +import java.sql.Date; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +} + +@lexer::members { + public String errorMsg; + + private static final Pattern datePattern = Pattern.compile(".*(\\d\\d\\d\\d-\\d\\d-\\d\\d).*"); + private static final ThreadLocal<SimpleDateFormat> dateFormat = + new ThreadLocal<SimpleDateFormat>() { + @Override + protected SimpleDateFormat initialValue() { + SimpleDateFormat val = new SimpleDateFormat("yyyy-MM-dd"); + val.setLenient(false); // Without this, 2020-20-20 becomes 2021-08-20. + return val; + }; + }; + + public static java.sql.Date ExtractDate (String input) { + Matcher m = datePattern.matcher(input); + if (!m.matches()) { + return null; + } + try { + return new java.sql.Date(dateFormat.get().parse(m.group(1)).getTime()); + } catch (ParseException pe) { + return null; + } + } + + @Override + public void emitErrorMessage(String msg) { + // save for caller to detect invalid filter + errorMsg = msg; + } +} + +@members { + public ExpressionTree tree = new ExpressionTree(); + + public static String TrimQuotes (String input) { + if (input.length () > 1) { + if ((input.charAt (0) == '"' && input.charAt (input.length () - 1) == '"') + || (input.charAt (0) == '\'' && input.charAt (input.length () - 1) == '\'')) { + return input.substring (1, input.length () - 1); + } + } + return input; + } +} + +@rulecatch{ + catch (RecognitionException e){ + throw e; + } +} + +//main rule +filter + : + orExpression + ; + +orExpression + : + andExpression (KW_OR andExpression { tree.addIntermediateNode(LogicalOperator.OR); } )* + ; + +andExpression + : + expression (KW_AND expression { tree.addIntermediateNode(LogicalOperator.AND); } )* + ; + +expression + : + LPAREN orExpression RPAREN + | + operatorExpression + ; + +operatorExpression + : + betweenExpression + | + inExpression + | + multiColInExpression + | + binOpExpression + ; + +binOpExpression +@init { + boolean isReverseOrder = false; + Object val = null; +} + : + ( + ( + (key = Identifier op = operator value = DateLiteral) + | + (value = DateLiteral op = operator key = Identifier) { isReverseOrder = true; } + ) { val = FilterLexer.ExtractDate(value.getText()); } + | + ( + (key = Identifier op = operator value = StringLiteral) + | + (value = StringLiteral op = operator key = Identifier) { isReverseOrder = true; } + ) { val = TrimQuotes(value.getText()); } + | + ( + (key = Identifier op = operator value = IntegralLiteral) + | + (value = IntegralLiteral op = operator key = Identifier) { isReverseOrder = true; } + ) { val = Long.parseLong(value.getText()); } + ) + { + LeafNode node = new LeafNode(); + node.keyName = key.getText(); + node.value = val; + node.operator = op; + node.isReverseOrder = isReverseOrder; + + tree.addLeafNode(node); + }; + +operator returns [Operator op] + : + t = (LESSTHAN | LESSTHANOREQUALTO | GREATERTHAN | GREATERTHANOREQUALTO | KW_LIKE | EQUAL | NOTEQUAL) + { + $op = Operator.fromString(t.getText().toUpperCase()); + }; + +betweenExpression +@init { + Object leftV = null; + Object rightV = null; + boolean isPositive = true; +} + : + ( + key = Identifier (KW_NOT { isPositive = false; } )? BETWEEN + ( + (left = DateLiteral KW_AND right = DateLiteral) { + leftV = FilterLexer.ExtractDate(left.getText()); + rightV = FilterLexer.ExtractDate(right.getText()); + } + | + (left = StringLiteral KW_AND right = StringLiteral) { leftV = TrimQuotes(left.getText()); + rightV = TrimQuotes(right.getText()); + } + | + (left = IntegralLiteral KW_AND right = IntegralLiteral) { leftV = Long.parseLong(left.getText()); + rightV = Long.parseLong(right.getText()); + } + ) + ) + { + LeafNode leftNode = new LeafNode(), rightNode = new LeafNode(); + leftNode.keyName = rightNode.keyName = key.getText(); + leftNode.value = leftV; + rightNode.value = rightV; + leftNode.operator = isPositive ? Operator.GREATERTHANOREQUALTO : Operator.LESSTHAN; + rightNode.operator = isPositive ? Operator.LESSTHANOREQUALTO : Operator.GREATERTHAN; + tree.addLeafNode(leftNode); + tree.addLeafNode(rightNode); + tree.addIntermediateNode(isPositive ? LogicalOperator.AND : LogicalOperator.OR); + }; + +inExpression +@init { + List constants = new ArrayList(); + Object constantV = null; + boolean isPositive = true; +} + : + ( + LPAREN key = Identifier RPAREN ( KW_NOT { isPositive = false; } )? IN LPAREN + ( + ( + constant = DateLiteral + { + constantV = FilterLexer.ExtractDate(constant.getText()); + constants.add(constantV); + } + ( + COMMA constant = DateLiteral + { + constantV = FilterLexer.ExtractDate(constant.getText()); + constants.add(constantV); + } + )* + ) + | + ( + constant = StringLiteral + { + constantV = TrimQuotes(constant.getText()); + constants.add(constantV); + } + ( + COMMA constant = StringLiteral + { + constantV = TrimQuotes(constant.getText()); + constants.add(constantV); + } + )* + ) + | + ( + constant = IntegralLiteral + { + constantV = Long.parseLong(constant.getText()); + constants.add(constantV); + } + ( + COMMA constant = IntegralLiteral + { + constantV = Long.parseLong(constant.getText()); + constants.add(constantV); + } + )* + ) + ) RPAREN + ) + { + for (int i = 0; i < constants.size(); i++) { + Object value = constants.get(i); + LeafNode leaf = new LeafNode(); + leaf.keyName = key.getText(); + leaf.value = value; + leaf.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + tree.addLeafNode(leaf); + if (i != 0) { + tree.addIntermediateNode(isPositive ? LogicalOperator.OR : LogicalOperator.AND); + } + } + }; + +multiColInExpression +@init { + List<String> keyNames = new ArrayList<String>(); + List constants = new ArrayList(); + List partialConstants; + String keyV = null; + Object constantV = null; + boolean isPositive = true; +} + : + ( + LPAREN + ( + KW_STRUCT LPAREN key = Identifier + { + keyV = key.getText(); + keyNames.add(keyV); + } + ( + COMMA key = Identifier + { + keyV = key.getText(); + keyNames.add(keyV); + } + )* RPAREN + ) RPAREN ( KW_NOT { isPositive = false; } )? IN LPAREN KW_CONST KW_STRUCT LPAREN + { + partialConstants = new ArrayList(); + } + ( + constant = DateLiteral + { + constantV = FilterLexer.ExtractDate(constant.getText()); + partialConstants.add(constantV); + } + | constant = StringLiteral + { + constantV = TrimQuotes(constant.getText()); + partialConstants.add(constantV); + } + | constant = IntegralLiteral + { + constantV = Long.parseLong(constant.getText()); + partialConstants.add(constantV); + } + ) + ( + COMMA + ( + constant = DateLiteral + { + constantV = FilterLexer.ExtractDate(constant.getText()); + partialConstants.add(constantV); + } + | constant = StringLiteral + { + constantV = TrimQuotes(constant.getText()); + partialConstants.add(constantV); + } + | constant = IntegralLiteral + { + constantV = Long.parseLong(constant.getText()); + partialConstants.add(constantV); + } + ) + )* + { + constants.add(partialConstants); + } + RPAREN + ( + COMMA KW_CONST KW_STRUCT LPAREN + { + partialConstants = new ArrayList(); + } + ( + constant = DateLiteral + { + constantV = FilterLexer.ExtractDate(constant.getText()); + partialConstants.add(constantV); + } + | constant = StringLiteral + { + constantV = TrimQuotes(constant.getText()); + partialConstants.add(constantV); + } + | constant = IntegralLiteral + { + constantV = Long.parseLong(constant.getText()); + partialConstants.add(constantV); + } + ) + ( + COMMA + ( + constant = DateLiteral + { + constantV = FilterLexer.ExtractDate(constant.getText()); + partialConstants.add(constantV); + } + | constant = StringLiteral + { + constantV = TrimQuotes(constant.getText()); + partialConstants.add(constantV); + } + | constant = IntegralLiteral + { + constantV = Long.parseLong(constant.getText()); + partialConstants.add(constantV); + } + ) + )* + { + constants.add(partialConstants); + } + RPAREN + )* RPAREN + ) + { + for (int i = 0; i < constants.size(); i++) { + List list = (List) constants.get(i); + assert keyNames.size() == list.size(); + for (int j=0; j < list.size(); j++) { + String keyName = keyNames.get(j); + Object value = list.get(j); + LeafNode leaf = new LeafNode(); + leaf.keyName = keyName; + leaf.value = value; + leaf.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + tree.addLeafNode(leaf); + if (j != 0) { + tree.addIntermediateNode(isPositive ? LogicalOperator.AND : LogicalOperator.OR); + } + } + if (i != 0) { + tree.addIntermediateNode(isPositive ? LogicalOperator.OR : LogicalOperator.AND); + } + } + }; + +// Keywords +KW_NOT : 'NOT'; +KW_AND : 'AND'; +KW_OR : 'OR'; +KW_LIKE : 'LIKE'; +KW_DATE : 'date'; +KW_CONST : 'CONST'; +KW_STRUCT : 'STRUCT'; + +// Operators +LPAREN : '(' ; +RPAREN : ')' ; +COMMA : ',' ; +EQUAL : '='; +NOTEQUAL : '<>' | '!='; +LESSTHANOREQUALTO : '<='; +LESSTHAN : '<'; +GREATERTHANOREQUALTO : '>='; +GREATERTHAN : '>'; +BETWEEN : 'BETWEEN'; +IN : 'IN'; + +// LITERALS +fragment +Letter + : 'a'..'z' | 'A'..'Z' + ; + +fragment +Digit + : + '0'..'9' + ; + +fragment DateString + : + (Digit)(Digit)(Digit)(Digit) '-' (Digit)(Digit) '-' (Digit)(Digit) + ; + +/* When I figure out how to make lexer backtrack after validating predicate, dates would be able +to support single quotes [( '\'' DateString '\'' ) |]. For now, what we do instead is have a hack +to parse the string in metastore code from StringLiteral. */ +DateLiteral + : + KW_DATE? DateString { ExtractDate(getText()) != null }? + ; + +StringLiteral + : + ( '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' + | '\"' ( ~('\"'|'\\') | ('\\' .) )* '\"' + ) + ; + +IntegralLiteral + : + ('-')? (Digit)+ + ; + +Identifier + : + (Letter | Digit) (Letter | Digit | '_')* + ; + +WS : (' '|'\r'|'\t'|'\n')+ { skip(); } ; http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/parser/package-info.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/parser/package-info.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/parser/package-info.java new file mode 100644 index 0000000..29f1f0b --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/parser/package-info.java @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package implements the parser for parsing the string filter + * for the listPartitionsByFilter API. + */ +package org.apache.hadoop.hive.metastore.parser; http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/AcidOpenTxnsCounterService.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/AcidOpenTxnsCounterService.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/AcidOpenTxnsCounterService.java index 1223b52..e3f7eca 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/AcidOpenTxnsCounterService.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/AcidOpenTxnsCounterService.java @@ -31,13 +31,13 @@ public class AcidOpenTxnsCounterService implements RunnableConfigurable { private Configuration conf; private int isAliveCounter = 0; private long lastLogTime = 0; + private TxnStore txnHandler; @Override public void run() { try { long startTime = System.currentTimeMillis(); isAliveCounter++; - TxnStore txnHandler = TxnUtils.getTxnStore(conf); txnHandler.countOpenTxns(); if (System.currentTimeMillis() - lastLogTime > 60 * 1000) { LOG.info("AcidOpenTxnsCounterService ran for " + @@ -54,6 +54,7 @@ public class AcidOpenTxnsCounterService implements RunnableConfigurable { @Override public void setConf(Configuration configuration) { conf = configuration; + txnHandler = TxnUtils.getTxnStore(conf); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/JavaUtils.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/JavaUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/JavaUtils.java index 40f7393..593dee3 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/JavaUtils.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/JavaUtils.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.lang.reflect.Constructor; import java.net.InetAddress; import java.net.UnknownHostException; @@ -55,6 +56,38 @@ public class JavaUtils { } /** + * Create an object of the given class. + * @param theClass + * @param parameterTypes + * an array of parameterTypes for the constructor + * @param initargs + * the list of arguments for the constructor + */ + public static <T> T newInstance(Class<T> theClass, Class<?>[] parameterTypes, + Object[] initargs) { + // Perform some sanity checks on the arguments. + if (parameterTypes.length != initargs.length) { + throw new IllegalArgumentException( + "Number of constructor parameter types doesn't match number of arguments"); + } + for (int i = 0; i < parameterTypes.length; i++) { + Class<?> clazz = parameterTypes[i]; + if (initargs[i] != null && !(clazz.isInstance(initargs[i]))) { + throw new IllegalArgumentException("Object : " + initargs[i] + + " is not an instance of " + clazz); + } + } + + try { + Constructor<T> meth = theClass.getDeclaredConstructor(parameterTypes); + meth.setAccessible(true); + return meth.newInstance(initargs); + } catch (Exception e) { + throw new RuntimeException("Unable to instantiate " + theClass.getName(), e); + } + } + + /** * @return name of current host */ public static String hostname() { http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index 37fc56b..77790ad 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -17,11 +17,63 @@ */ package org.apache.hadoop.hive.metastore.utils; +import com.google.common.base.Predicates; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.commons.lang.*; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.SkewedInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.regex.Pattern; + public class MetaStoreUtils { + /** A fixed date format to be used for hive partition column values. */ + public static final ThreadLocal<DateFormat> PARTITION_DATE_FORMAT = + new ThreadLocal<DateFormat>() { + @Override + protected DateFormat initialValue() { + DateFormat val = new SimpleDateFormat("yyyy-MM-dd"); + val.setLenient(false); // Without this, 2020-20-20 becomes 2021-08-20. + return val; + } + }; + private static final Charset ENCODING = StandardCharsets.UTF_8; private static final Logger LOG = LoggerFactory.getLogger(MetaStoreUtils.class); /** @@ -53,4 +105,258 @@ public class MetaStoreUtils { } return sb.toString(); } + + /** + * convert Exception to MetaException, which sets the cause to such exception + * @param e cause of the exception + * @return the MetaException with the specified exception as the cause + */ + public static MetaException newMetaException(Exception e) { + return newMetaException(e != null ? e.getMessage() : null, e); + } + + /** + * convert Exception to MetaException, which sets the cause to such exception + * @param errorMessage the error message for this MetaException + * @param e cause of the exception + * @return the MetaException with the specified exception as the cause + */ + public static MetaException newMetaException(String errorMessage, Exception e) { + MetaException metaException = new MetaException(errorMessage); + if (e != null) { + metaException.initCause(e); + } + return metaException; + } + + /** + * Helper function to transform Nulls to empty strings. + */ + private static final com.google.common.base.Function<String,String> transFormNullsToEmptyString + = new com.google.common.base.Function<String, String>() { + @Override + public java.lang.String apply(@Nullable java.lang.String string) { + return org.apache.commons.lang.StringUtils.defaultString(string); + } + }; + /** + * We have aneed to sanity-check the map before conversion from persisted objects to + * metadata thrift objects because null values in maps will cause a NPE if we send + * across thrift. Pruning is appropriate for most cases except for databases such as + * Oracle where Empty strings are stored as nulls, in which case we need to handle that. + * See HIVE-8485 for motivations for this. + */ + public static Map<String,String> trimMapNulls( + Map<String,String> dnMap, boolean retrieveMapNullsAsEmptyStrings){ + if (dnMap == null){ + return null; + } + // Must be deterministic order map - see HIVE-8707 + // => we use Maps.newLinkedHashMap instead of Maps.newHashMap + if (retrieveMapNullsAsEmptyStrings) { + // convert any nulls present in map values to empty strings - this is done in the case + // of backing dbs like oracle which persist empty strings as nulls. + return Maps.newLinkedHashMap(Maps.transformValues(dnMap, transFormNullsToEmptyString)); + } else { + // prune any nulls present in map values - this is the typical case. + return Maps.newLinkedHashMap(Maps.filterValues(dnMap, Predicates.notNull())); + } + } + + + // given a list of partStats, this function will give you an aggr stats + public static List<ColumnStatisticsObj> aggrPartitionStats(List<ColumnStatistics> partStats, + String dbName, String tableName, List<String> partNames, List<String> colNames, + boolean useDensityFunctionForNDVEstimation, double ndvTuner) + throws MetaException { + // 1. group by the stats by colNames + // map the colName to List<ColumnStatistics> + Map<String, List<ColumnStatistics>> map = new HashMap<>(); + for (ColumnStatistics css : partStats) { + List<ColumnStatisticsObj> objs = css.getStatsObj(); + for (ColumnStatisticsObj obj : objs) { + List<ColumnStatisticsObj> singleObj = new ArrayList<>(); + singleObj.add(obj); + ColumnStatistics singleCS = new ColumnStatistics(css.getStatsDesc(), singleObj); + if (!map.containsKey(obj.getColName())) { + map.put(obj.getColName(), new ArrayList<ColumnStatistics>()); + } + map.get(obj.getColName()).add(singleCS); + } + } + return MetaStoreUtils.aggrPartitionStats(map,dbName,tableName,partNames,colNames,useDensityFunctionForNDVEstimation, ndvTuner); + } + + public static List<ColumnStatisticsObj> aggrPartitionStats( + Map<String, List<ColumnStatistics>> map, String dbName, String tableName, + final List<String> partNames, List<String> colNames, + final boolean useDensityFunctionForNDVEstimation,final double ndvTuner) throws MetaException { + List<ColumnStatisticsObj> colStats = new ArrayList<>(); + // 2. Aggregate stats for each column in a separate thread + if (map.size()< 1) { + //stats are absent in RDBMS + LOG.debug("No stats data found for: dbName=" +dbName +" tblName=" + tableName + + " partNames= " + partNames + " colNames=" + colNames ); + return colStats; + } + final ExecutorService pool = Executors.newFixedThreadPool(Math.min(map.size(), 16), + new ThreadFactoryBuilder().setDaemon(true).setNameFormat("aggr-col-stats-%d").build()); + final List<Future<ColumnStatisticsObj>> futures = Lists.newLinkedList(); + + long start = System.currentTimeMillis(); + for (final Map.Entry<String, List<ColumnStatistics>> entry : map.entrySet()) { + futures.add(pool.submit(new Callable<ColumnStatisticsObj>() { + @Override + public ColumnStatisticsObj call() throws Exception { + List<ColumnStatistics> css = entry.getValue(); + ColumnStatsAggregator aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css + .iterator().next().getStatsObj().iterator().next().getStatsData().getSetField(), + useDensityFunctionForNDVEstimation, ndvTuner); + ColumnStatisticsObj statsObj = aggregator.aggregate(entry.getKey(), partNames, css); + return statsObj; + }})); + } + pool.shutdown(); + for (Future<ColumnStatisticsObj> future : futures) { + try { + colStats.add(future.get()); + } catch (InterruptedException | ExecutionException e) { + pool.shutdownNow(); + LOG.debug(e.toString()); + throw new MetaException(e.toString()); + } + } + LOG.debug("Time for aggr col stats in seconds: {} Threads used: {}", + ((System.currentTimeMillis() - (double)start))/1000, Math.min(map.size(), 16)); + return colStats; + } + + public static double decimalToDouble(Decimal decimal) { + return new BigDecimal(new BigInteger(decimal.getUnscaled()), decimal.getScale()).doubleValue(); + } + + public static String[] getQualifiedName(String defaultDbName, String tableName) { + String[] names = tableName.split("\\."); + if (names.length == 1) { + return new String[] { defaultDbName, tableName}; + } + return names; + } + + public static void validatePartitionNameCharacters(List<String> partVals, + Pattern partitionValidationPattern) throws MetaException { + + String invalidPartitionVal = getPartitionValWithInvalidCharacter(partVals, partitionValidationPattern); + if (invalidPartitionVal != null) { + throw new MetaException("Partition value '" + invalidPartitionVal + + "' contains a character " + "not matched by whitelist pattern '" + + partitionValidationPattern.toString() + "'. " + "(configure with " + + MetastoreConf.ConfVars.PARTITION_NAME_WHITELIST_PATTERN.varname + ")"); + } + } + + public static String getPartitionValWithInvalidCharacter(List<String> partVals, + Pattern partitionValidationPattern) { + if (partitionValidationPattern == null) { + return null; + } + + for (String partVal : partVals) { + if (!partitionValidationPattern.matcher(partVal).matches()) { + return partVal; + } + } + + return null; + } + + /** + * Produce a hash for the storage descriptor + * @param sd storage descriptor to hash + * @param md message descriptor to use to generate the hash + * @return the hash as a byte array + */ + public static byte[] hashStorageDescriptor(StorageDescriptor sd, MessageDigest md) { + // Note all maps and lists have to be absolutely sorted. Otherwise we'll produce different + // results for hashes based on the OS or JVM being used. + md.reset(); + for (FieldSchema fs : sd.getCols()) { + md.update(fs.getName().getBytes(ENCODING)); + md.update(fs.getType().getBytes(ENCODING)); + if (fs.getComment() != null) md.update(fs.getComment().getBytes(ENCODING)); + } + if (sd.getInputFormat() != null) { + md.update(sd.getInputFormat().getBytes(ENCODING)); + } + if (sd.getOutputFormat() != null) { + md.update(sd.getOutputFormat().getBytes(ENCODING)); + } + md.update(sd.isCompressed() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING)); + md.update(Integer.toString(sd.getNumBuckets()).getBytes(ENCODING)); + if (sd.getSerdeInfo() != null) { + SerDeInfo serde = sd.getSerdeInfo(); + if (serde.getName() != null) { + md.update(serde.getName().getBytes(ENCODING)); + } + if (serde.getSerializationLib() != null) { + md.update(serde.getSerializationLib().getBytes(ENCODING)); + } + if (serde.getParameters() != null) { + SortedMap<String, String> params = new TreeMap<>(serde.getParameters()); + for (Map.Entry<String, String> param : params.entrySet()) { + md.update(param.getKey().getBytes(ENCODING)); + md.update(param.getValue().getBytes(ENCODING)); + } + } + } + if (sd.getBucketCols() != null) { + List<String> bucketCols = new ArrayList<>(sd.getBucketCols()); + for (String bucket : bucketCols) md.update(bucket.getBytes(ENCODING)); + } + if (sd.getSortCols() != null) { + SortedSet<Order> orders = new TreeSet<>(sd.getSortCols()); + for (Order order : orders) { + md.update(order.getCol().getBytes(ENCODING)); + md.update(Integer.toString(order.getOrder()).getBytes(ENCODING)); + } + } + if (sd.getSkewedInfo() != null) { + SkewedInfo skewed = sd.getSkewedInfo(); + if (skewed.getSkewedColNames() != null) { + SortedSet<String> colnames = new TreeSet<>(skewed.getSkewedColNames()); + for (String colname : colnames) md.update(colname.getBytes(ENCODING)); + } + if (skewed.getSkewedColValues() != null) { + SortedSet<String> sortedOuterList = new TreeSet<>(); + for (List<String> innerList : skewed.getSkewedColValues()) { + SortedSet<String> sortedInnerList = new TreeSet<>(innerList); + sortedOuterList.add(org.apache.commons.lang.StringUtils.join(sortedInnerList, ".")); + } + for (String colval : sortedOuterList) md.update(colval.getBytes(ENCODING)); + } + if (skewed.getSkewedColValueLocationMaps() != null) { + SortedMap<String, String> sortedMap = new TreeMap<>(); + for (Map.Entry<List<String>, String> smap : skewed.getSkewedColValueLocationMaps().entrySet()) { + SortedSet<String> sortedKey = new TreeSet<>(smap.getKey()); + sortedMap.put(org.apache.commons.lang.StringUtils.join(sortedKey, "."), smap.getValue()); + } + for (Map.Entry<String, String> e : sortedMap.entrySet()) { + md.update(e.getKey().getBytes(ENCODING)); + md.update(e.getValue().getBytes(ENCODING)); + } + } + md.update(sd.isStoredAsSubDirectories() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING)); + } + + return md.digest(); + } + + public static List<String> getColumnNamesForTable(Table table) { + List<String> colNames = new ArrayList<>(); + Iterator<FieldSchema> colsIterator = table.getSd().getColsIterator(); + while (colsIterator.hasNext()) { + colNames.add(colsIterator.next().getName()); + } + return colNames; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/ObjectPair.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/ObjectPair.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/ObjectPair.java new file mode 100644 index 0000000..5b49a25 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/ObjectPair.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.utils; + + + +public class ObjectPair<F, S> { + private F first; + private S second; + + public ObjectPair() {} + + /** + * Creates a pair. Constructor doesn't infer template args but + * the method does, so the code becomes less ugly. + */ + public static <T1, T2> ObjectPair<T1, T2> create(T1 f, T2 s) { + return new ObjectPair<>(f, s); + } + + public ObjectPair(F first, S second) { + this.first = first; + this.second = second; + } + + public F getFirst() { + return first; + } + + public void setFirst(F first) { + this.first = first; + } + + public S getSecond() { + return second; + } + + public void setSecond(S second) { + this.second = second; + } + + @Override + public boolean equals(Object that) { + if (that == null) { + return false; + } + if (that instanceof ObjectPair) { + return this.equals((ObjectPair<F, S>)that); + } + return false; + } + + public boolean equals(ObjectPair<F, S> that) { + if (that == null) { + return false; + } + + return this.getFirst().equals(that.getFirst()) && + this.getSecond().equals(that.getSecond()); + } + + @Override + public int hashCode() { + return first.hashCode() * 31 + second.hashCode(); + } + + public String toString() { + return first + ":" + second; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/TestStatsSetupConst.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/TestStatsSetupConst.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/TestStatsSetupConst.java new file mode 100644 index 0000000..883e2bd --- /dev/null +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/TestStatsSetupConst.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.util.HashMap; +import java.util.Map; + +import org.junit.Test; + +import com.google.common.collect.Lists; +public class TestStatsSetupConst { + + @Test + public void testSetBasicStatsState_missesUpgrade() { + Map<String, String> params=new HashMap<>(); + params.put(StatsSetupConst.COLUMN_STATS_ACCURATE, "FALSE"); + StatsSetupConst.setBasicStatsState(params, String.valueOf(true)); + assertEquals("{\"BASIC_STATS\":\"true\"}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } + + @Test + public void setColumnStatsState_camelcase() { + Map<String, String> params=new HashMap<>(); + StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo")); + String val1 = params.get(StatsSetupConst.COLUMN_STATS_ACCURATE); + StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo")); + String val2 = params.get(StatsSetupConst.COLUMN_STATS_ACCURATE); + assertEquals(val1, val2); + } + + @Test + public void testSetBasicStatsState_none() { + Map<String, String> params=new HashMap<>(); + StatsSetupConst.setBasicStatsState(params, String.valueOf(true)); + assertEquals("{\"BASIC_STATS\":\"true\"}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } + + @Test + public void testSetBasicStatsState_falseIsAbsent() { + Map<String, String> params=new HashMap<>(); + StatsSetupConst.setBasicStatsState(params, String.valueOf(true)); + StatsSetupConst.setBasicStatsState(params, String.valueOf(false)); + assertNull(params.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } + + // earlier implementation have quoted boolean values...so the new implementation should preserve this + @Test + public void testStatColumnEntriesCompat() { + Map<String, String> params0=new HashMap<>(); + StatsSetupConst.setBasicStatsState(params0, String.valueOf(true)); + StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo")); + + assertEquals("{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } + + @Test + public void testColumnEntries_orderIndependence() { + Map<String, String> params0=new HashMap<>(); + StatsSetupConst.setBasicStatsState(params0, String.valueOf(true)); + StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo","Bar")); + Map<String, String> params1=new HashMap<>(); + StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("Bar","Foo")); + StatsSetupConst.setBasicStatsState(params1, String.valueOf(true)); + + assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } + + @Test + public void testColumnEntries_orderIndependence2() { + Map<String, String> params0=new HashMap<>(); + // in case jackson is able to deserialize...it may use a different implementation for the map - which may not preserve order + StatsSetupConst.setBasicStatsState(params0, String.valueOf(true)); + StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year")); + StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year","month")); + Map<String, String> params1=new HashMap<>(); + StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("month","year")); + StatsSetupConst.setBasicStatsState(params1, String.valueOf(true)); + + System.out.println(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } + + // FIXME: current objective is to keep the previous outputs...but this is possibly bad.. + @Test + public void testColumnEntries_areKept_whenBasicIsAbsent() { + Map<String, String> params=new HashMap<>(); + StatsSetupConst.setBasicStatsState(params, String.valueOf(false)); + StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo")); + assertEquals("{\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java new file mode 100644 index 0000000..e3a6f14 --- /dev/null +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.ndv.fm; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertArrayEquals; + +import java.io.IOException; + +import javolution.util.FastBitSet; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.junit.Test; + +public class TestFMSketchSerialization { + + private FastBitSet[] deserialize(String s, int numBitVectors) { + FastBitSet[] b = new FastBitSet[numBitVectors]; + for (int j = 0; j < numBitVectors; j++) { + b[j] = new FastBitSet(FMSketch.BIT_VECTOR_SIZE); + b[j].clear(); + } + + int vectorIndex = 0; + + /* + * Parse input string to obtain the indexes that are set in the bitvector. + * When a toString() is called on a FastBitSet object to serialize it, the + * serialization adds { and } to the beginning and end of the return String. + * Skip "{", "}", ",", " " in the input string. + */ + for (int i = 1; i < s.length() - 1;) { + char c = s.charAt(i); + i = i + 1; + + // Move on to the next bit vector + if (c == '}') { + vectorIndex = vectorIndex + 1; + } + + // Encountered a numeric value; Extract out the entire number + if (c >= '0' && c <= '9') { + String t = new String(); + t = t + c; + c = s.charAt(i); + i = i + 1; + + while (c != ',' && c != '}') { + t = t + c; + c = s.charAt(i); + i = i + 1; + } + + int bitIndex = Integer.parseInt(t); + assert (bitIndex >= 0); + assert (vectorIndex < numBitVectors); + b[vectorIndex].set(bitIndex); + if (c == '}') { + vectorIndex = vectorIndex + 1; + } + } + } + return b; + } + + @Test + public void testSerDe() throws IOException { + String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}"; + FastBitSet[] fastBitSet = deserialize(bitVectors, 16); + FMSketch sketch = new FMSketch(16); + for (int i = 0; i < 16; i++) { + sketch.setBitVector(fastBitSet[i], i); + } + assertEquals(sketch.estimateNumDistinctValues(), 3); + byte[] buf = sketch.serialize(); + FMSketch newSketch = (FMSketch) NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(buf); + sketch.equals(newSketch); + assertEquals(newSketch.estimateNumDistinctValues(), 3); + assertArrayEquals(newSketch.serialize(), buf); + } + +} \ No newline at end of file
