Repository: hive
Updated Branches:
  refs/heads/master 5eebbdf7c -> 7b9540e48


HIVE-20490: UDAF: Add an 'approx_distinct' to Hive (Gopal V, reviewed by 
Gunther Hagleitner, Nishant Bangarwa)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7b9540e4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7b9540e4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7b9540e4

Branch: refs/heads/master
Commit: 7b9540e48be1d896c8229564e49f5803bbca6a27
Parents: 5eebbdf
Author: Gopal V <[email protected]>
Authored: Sun Oct 21 13:44:33 2018 -0700
Committer: Gopal V <[email protected]>
Committed: Sun Oct 21 13:44:33 2018 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   3 +-
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |   2 +
 .../generic/GenericUDAFApproximateDistinct.java | 237 +++++++++
 .../queries/clientpositive/approx_distinct.q    |  76 +++
 .../clientpositive/llap/approx_distinct.q.out   | 490 +++++++++++++++++++
 .../results/clientpositive/show_functions.q.out |   1 +
 6 files changed, 808 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7b9540e4/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 8349e3d..ff9f758 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -938,7 +938,8 @@ minillaplocal.query.files=\
   partialdhj.q,\
   stats_date.q,\
   dst.q,\
-  q93_with_constraints.q
+  q93_with_constraints.q,\
+  approx_distinct.q
 
 encrypted.query.files=encryption_join_unencrypted_tbl.q,\
   encryption_insert_partition_static.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/7b9540e4/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 00e97a0..578b16c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -68,6 +68,7 @@ import org.apache.hadoop.hive.ql.udf.UDFFromUnixTime;
 import org.apache.hadoop.hive.ql.udf.UDFHex;
 import org.apache.hadoop.hive.ql.udf.UDFHour;
 import org.apache.hadoop.hive.ql.udf.UDFJson;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFApproximateDistinct;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLength;
 import org.apache.hadoop.hive.ql.udf.UDFLike;
 import org.apache.hadoop.hive.ql.udf.UDFLn;
@@ -465,6 +466,7 @@ public final class FunctionRegistry {
 
     system.registerGenericUDAF("compute_stats", new GenericUDAFComputeStats());
     system.registerGenericUDAF("bloom_filter", new GenericUDAFBloomFilter());
+    system.registerGenericUDAF("approx_distinct", new 
GenericUDAFApproximateDistinct());
     system.registerUDAF("percentile", UDAFPercentile.class);
 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b9540e4/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFApproximateDistinct.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFApproximateDistinct.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFApproximateDistinct.java
new file mode 100644
index 0000000..1e8fc8a
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFApproximateDistinct.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.ObjectOutputStream;
+
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLogUtils;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.HiveBaseChar;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AbstractAggregationBuffer;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+
+@Description(name = "approx_distinct", value = "_FUNC_(x) - generate an 
approximate distinct from input column")
+@SuppressWarnings("deprecation")
+public class GenericUDAFApproximateDistinct extends 
AbstractGenericUDAFResolver {
+
+  static final class HyperLogLogBuffer extends AbstractAggregationBuffer {
+    public HyperLogLog hll;
+
+    public HyperLogLogBuffer() {
+      this.reset();
+    }
+
+    @Override
+    public int estimate() {
+      return 4096; /* 4kb usually */
+    }
+
+    public void reset() {
+      hll = HyperLogLog.builder().setNumRegisterIndexBits(12).build();
+    }
+  }
+  
+  public static class HyperLogLogEvaluator extends GenericUDAFEvaluator {
+
+    ObjectInspector inputOI;
+    WritableBinaryObjectInspector partialOI;
+    ByteArrayOutputStream output = new ByteArrayOutputStream();
+    
+    /*
+     * All modes returns BINARY columns.
+     * 
+     * PARTIAL1 takes in a primitive inspector
+     * 
+     * @see 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator#init(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode,
 org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector[])
+     */
+    public ObjectInspector init(Mode m, ObjectInspector[] parameters)
+        throws HiveException {
+      super.init(m, parameters);
+      partialOI = 
PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+      switch (m) {
+      case PARTIAL1: 
+        inputOI = parameters[0];
+        return partialOI;
+      case PARTIAL2:
+        return partialOI;
+      case FINAL:
+      case COMPLETE:
+        return partialOI;
+      default:
+        throw new IllegalArgumentException("Unknown UDAF mode " + m);
+      }
+    }
+    
+    @Override
+    public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+      return new HyperLogLogBuffer();
+    }
+
+    @Override
+    public void iterate(AggregationBuffer agg, Object[] args)
+        throws HiveException {
+      if (args[0] == null) {
+        return;
+      }
+      HyperLogLog hll = ((HyperLogLogBuffer)agg).hll;
+      // should use BinarySortableSerDe, perhaps
+      Object val = ObjectInspectorUtils.copyToStandardJavaObject(args[0], 
inputOI);
+      try {
+        if (val instanceof Byte || val instanceof Character || val instanceof 
Short) {
+          hll.add(val.hashCode());
+        } else if (val instanceof Integer) {
+          hll.addInt(((Integer) val).intValue());
+        } else if(val instanceof Long) {
+          hll.addLong(((Long) val).longValue());
+        } else if (val instanceof Float) {
+          hll.addFloat(((Float) val).floatValue());
+        } else if (val instanceof Double) {
+          hll.addDouble((Double)val);
+        } else if (val instanceof String) {
+          hll.addString(val.toString());
+        } else if (val instanceof HiveDecimal) {
+          hll.addToEstimator((HiveDecimal)val);
+        } else if (val instanceof Date) {
+          hll.addInt(((Date)val).toEpochDay());
+        } else if (val instanceof Timestamp) {
+          hll.addLong(((Timestamp)val).toEpochMilli());
+        } else if (val instanceof HiveIntervalDayTime) {
+          hll.addLong(((HiveIntervalDayTime)val).getTotalSeconds());
+        } else if (val instanceof HiveBaseChar) {
+          hll.addString(((HiveBaseChar)val).toString());
+        } else {
+          /* potential multi-key option (does this ever get used?) */
+          output.reset();
+          ObjectOutputStream out = new ObjectOutputStream(output);
+          out.writeObject(val);
+          byte[] key = output.toByteArray();
+          hll.addBytes(key);
+        }
+      } catch(IOException ioe) {
+        throw new HiveException(ioe);
+      }
+    }
+    
+    @Override
+    public Object terminatePartial(AggregationBuffer agg) throws HiveException 
{
+      HyperLogLog hll = ((HyperLogLogBuffer)agg).hll;
+      output.reset();
+      try {
+        HyperLogLogUtils.serializeHLL(output, hll);
+      } catch(IOException ioe) {
+        throw new HiveException(ioe);
+      }
+      return new BytesWritable(output.toByteArray());
+    }
+
+    @Override
+    public void merge(AggregationBuffer agg, Object partial) throws 
HiveException {
+      if (partial == null) {
+        return;
+      }
+      final BytesWritable bw = partialOI.getPrimitiveWritableObject(partial);
+      HyperLogLog hll = ((HyperLogLogBuffer)agg).hll;
+      merge(hll, bw);
+    }
+    
+    protected void merge(HyperLogLog hll, BytesWritable bw) throws 
HiveException {
+      try {
+        ByteArrayInputStream input = new ByteArrayInputStream(bw.getBytes(), 
0, bw.getLength());
+        HyperLogLog hll2 = HyperLogLogUtils.deserializeHLL(input);
+        hll.merge(hll2);
+        input.close();
+      } catch (IOException ioe) {
+        throw new HiveException(ioe);
+      }
+    }
+
+    @Override
+    public void reset(AggregationBuffer agg) throws HiveException {
+      ((HyperLogLogBuffer)agg).reset();
+    }
+
+    @Override
+    public Object terminate(AggregationBuffer agg) throws HiveException {
+      HyperLogLog hll = ((HyperLogLogBuffer)agg).hll;
+      output.reset();
+      try {
+        HyperLogLogUtils.serializeHLL(output, hll);
+      } catch(IOException ioe) {
+        throw new HiveException(ioe);
+      }
+      return new BytesWritable(output.toByteArray());
+    }
+  }
+
+  @Override
+  public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info)
+      throws SemanticException {
+    return getEvaluator(info.getParameters());
+  }
+  
+  public static final class CountApproximateDistinctEvaluator extends 
HyperLogLogEvaluator {
+    @Override
+    public ObjectInspector init(Mode m, ObjectInspector[] parameters)
+        throws HiveException {
+      ObjectInspector hyperloglog = super.init(m, parameters);
+      if(m == Mode.FINAL || m == Mode.COMPLETE) {
+        return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+      }
+      return hyperloglog;
+    }
+    
+    @Override
+    public Object terminate(AggregationBuffer agg) throws HiveException {
+      HyperLogLog hll = ((HyperLogLogBuffer)agg).hll;
+      return new LongWritable(hll.count());
+    }
+  }
+  
+  @Override
+  public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws 
SemanticException {
+    if (parameters.length != 1) {
+      throw new IllegalArgumentException("Function only takes 1 parameter");
+    } else if (parameters[0].getCategory() != 
ObjectInspector.Category.PRIMITIVE
+        && parameters[0].getCategory() != ObjectInspector.Category.STRUCT) {
+      throw new UDFArgumentTypeException(1,
+          "Only primitive/struct rows are accepted but "
+              + parameters[0].getTypeName() + " was passed.");
+    }
+    return new CountApproximateDistinctEvaluator();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b9540e4/ql/src/test/queries/clientpositive/approx_distinct.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/approx_distinct.q 
b/ql/src/test/queries/clientpositive/approx_distinct.q
new file mode 100644
index 0000000..6ab4875
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/approx_distinct.q
@@ -0,0 +1,76 @@
+
+
+create temporary table random_types_table (
+cboolean boolean,
+cint int,
+cbigint bigint,
+cfloat float,
+cdouble double,
+cdecimal64 decimal(7,2),
+cdecimal128 decimal(38,18),
+cdate date,
+ctimestamp timestamp,
+cstring string,
+cvarchar varchar(3),
+cchar char(3)
+) stored as orc;
+
+select 'expect 1', approx_distinct(1);
+select 'expect 1', approx_distinct(1.0);
+select 'expect 1', approx_distinct(false); 
+select 'expect 1', approx_distinct('X');
+select 'expect 1', approx_distinct(current_date);
+select 'expect 1', approx_distinct(current_timestamp);
+select 'expect 1', approx_distinct(1.0BD);
+select 'expect 1', approx_distinct(INTERVAL '1' DAY);
+
+-- No rows (all 0)
+
+select 'expect 0', approx_distinct(cboolean) from random_types_table;
+select 'expect 0', approx_distinct(cint) from random_types_table;
+select 'expect 0', approx_distinct(cbigint) from random_types_table;
+select 'expect 0', approx_distinct(cfloat) from random_types_table;
+select 'expect 0', approx_distinct(cdouble) from random_types_table;
+select 'expect 0', approx_distinct(cdecimal64) from random_types_table;
+select 'expect 0', approx_distinct(cdecimal128) from random_types_table;
+select 'expect 0', approx_distinct(cdate) from random_types_table;
+select 'expect 0', approx_distinct(ctimestamp) from random_types_table;
+select 'expect 0', approx_distinct(cstring) from random_types_table;
+select 'expect 0', approx_distinct(cvarchar) from random_types_table;
+select 'expect 0', approx_distinct(cchar) from random_types_table;
+
+-- 1 row twice (all 1)
+
+insert into random_types_table values (true, 1, 1, 1.0, 1.0, 1.0BD, 1.0BD, 
'2000-01-01', '2000-01-01 00:00:01', 'A', 'B', 'C'); 
+insert into random_types_table values (true, 1, 1, 1.0, 1.0, 1.0BD, 1.0BD, 
'2000-01-01', '2000-01-01 00:00:01', 'A', 'B', 'C'); 
+
+select 'expect 1', approx_distinct(cboolean) from random_types_table;
+select 'expect 1', approx_distinct(cint) from random_types_table;
+select 'expect 1', approx_distinct(cbigint) from random_types_table;
+select 'expect 1', approx_distinct(cfloat) from random_types_table;
+select 'expect 1', approx_distinct(cdouble) from random_types_table;
+select 'expect 1', approx_distinct(cdecimal64) from random_types_table;
+select 'expect 1', approx_distinct(cdecimal128) from random_types_table;
+select 'expect 1', approx_distinct(cdate) from random_types_table;
+select 'expect 1', approx_distinct(ctimestamp) from random_types_table;
+select 'expect 1', approx_distinct(cstring) from random_types_table;
+select 'expect 1', approx_distinct(cvarchar) from random_types_table;
+select 'expect 1', approx_distinct(cchar) from random_types_table;
+
+
+
+insert into random_types_table values (false, 2, 2, 2.0, 2.0, 2.0BD, 2.0BD, 
'1999-12-31', '1999-12-31 00:00:01', 'X', 'Y', 'Z'); 
+
+-- 2 unique rows (all 2)
+select 'expect 2', approx_distinct(cboolean) from random_types_table;
+select 'expect 2', approx_distinct(cint) from random_types_table;
+select 'expect 2', approx_distinct(cbigint) from random_types_table;
+select 'expect 2', approx_distinct(cfloat) from random_types_table;
+select 'expect 2', approx_distinct(cdouble) from random_types_table;
+select 'expect 2', approx_distinct(cdecimal64) from random_types_table;
+select 'expect 2', approx_distinct(cdecimal128) from random_types_table;
+select 'expect 2', approx_distinct(cdate) from random_types_table;
+select 'expect 2', approx_distinct(ctimestamp) from random_types_table;
+select 'expect 2', approx_distinct(cstring) from random_types_table;
+select 'expect 2', approx_distinct(cvarchar) from random_types_table;
+select 'expect 2', approx_distinct(cchar) from random_types_table;

http://git-wip-us.apache.org/repos/asf/hive/blob/7b9540e4/ql/src/test/results/clientpositive/llap/approx_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/approx_distinct.q.out 
b/ql/src/test/results/clientpositive/llap/approx_distinct.q.out
new file mode 100644
index 0000000..e727819
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/approx_distinct.q.out
@@ -0,0 +1,490 @@
+PREHOOK: query: create temporary table random_types_table (
+cboolean boolean,
+cint int,
+cbigint bigint,
+cfloat float,
+cdouble double,
+cdecimal64 decimal(7,2),
+cdecimal128 decimal(38,18),
+cdate date,
+ctimestamp timestamp,
+cstring string,
+cvarchar varchar(3),
+cchar char(3)
+) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@random_types_table
+POSTHOOK: query: create temporary table random_types_table (
+cboolean boolean,
+cint int,
+cbigint bigint,
+cfloat float,
+cdouble double,
+cdecimal64 decimal(7,2),
+cdecimal128 decimal(38,18),
+cdate date,
+ctimestamp timestamp,
+cstring string,
+cvarchar varchar(3),
+cchar char(3)
+) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@random_types_table
+PREHOOK: query: select 'expect 1', approx_distinct(1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(1.0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(1.0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(false)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(false)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct('X')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct('X')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(current_date)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(current_date)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(current_timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(current_timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(1.0BD)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(1.0BD)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(INTERVAL '1' DAY)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(INTERVAL '1' DAY)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 0', approx_distinct(cboolean) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 0', approx_distinct(cboolean) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 0       0
+PREHOOK: query: select 'expect 0', approx_distinct(cint) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 0', approx_distinct(cint) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 0       0
+PREHOOK: query: select 'expect 0', approx_distinct(cbigint) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 0', approx_distinct(cbigint) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 0       0
+PREHOOK: query: select 'expect 0', approx_distinct(cfloat) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 0', approx_distinct(cfloat) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 0       0
+PREHOOK: query: select 'expect 0', approx_distinct(cdouble) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 0', approx_distinct(cdouble) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 0       0
+PREHOOK: query: select 'expect 0', approx_distinct(cdecimal64) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 0', approx_distinct(cdecimal64) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 0       0
+PREHOOK: query: select 'expect 0', approx_distinct(cdecimal128) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 0', approx_distinct(cdecimal128) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 0       0
+PREHOOK: query: select 'expect 0', approx_distinct(cdate) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 0', approx_distinct(cdate) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 0       0
+PREHOOK: query: select 'expect 0', approx_distinct(ctimestamp) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 0', approx_distinct(ctimestamp) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 0       0
+PREHOOK: query: select 'expect 0', approx_distinct(cstring) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 0', approx_distinct(cstring) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 0       0
+PREHOOK: query: select 'expect 0', approx_distinct(cvarchar) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 0', approx_distinct(cvarchar) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 0       0
+PREHOOK: query: select 'expect 0', approx_distinct(cchar) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 0', approx_distinct(cchar) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 0       0
+PREHOOK: query: insert into random_types_table values (true, 1, 1, 1.0, 1.0, 
1.0BD, 1.0BD, '2000-01-01', '2000-01-01 00:00:01', 'A', 'B', 'C')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@random_types_table
+POSTHOOK: query: insert into random_types_table values (true, 1, 1, 1.0, 1.0, 
1.0BD, 1.0BD, '2000-01-01', '2000-01-01 00:00:01', 'A', 'B', 'C')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@random_types_table
+POSTHOOK: Lineage: random_types_table.cbigint SCRIPT []
+POSTHOOK: Lineage: random_types_table.cboolean SCRIPT []
+POSTHOOK: Lineage: random_types_table.cchar SCRIPT []
+POSTHOOK: Lineage: random_types_table.cdate SCRIPT []
+POSTHOOK: Lineage: random_types_table.cdecimal128 SCRIPT []
+POSTHOOK: Lineage: random_types_table.cdecimal64 SCRIPT []
+POSTHOOK: Lineage: random_types_table.cdouble SCRIPT []
+POSTHOOK: Lineage: random_types_table.cfloat SCRIPT []
+POSTHOOK: Lineage: random_types_table.cint SCRIPT []
+POSTHOOK: Lineage: random_types_table.cstring SCRIPT []
+POSTHOOK: Lineage: random_types_table.ctimestamp SCRIPT []
+POSTHOOK: Lineage: random_types_table.cvarchar SCRIPT []
+PREHOOK: query: insert into random_types_table values (true, 1, 1, 1.0, 1.0, 
1.0BD, 1.0BD, '2000-01-01', '2000-01-01 00:00:01', 'A', 'B', 'C')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@random_types_table
+POSTHOOK: query: insert into random_types_table values (true, 1, 1, 1.0, 1.0, 
1.0BD, 1.0BD, '2000-01-01', '2000-01-01 00:00:01', 'A', 'B', 'C')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@random_types_table
+POSTHOOK: Lineage: random_types_table.cbigint SCRIPT []
+POSTHOOK: Lineage: random_types_table.cboolean SCRIPT []
+POSTHOOK: Lineage: random_types_table.cchar SCRIPT []
+POSTHOOK: Lineage: random_types_table.cdate SCRIPT []
+POSTHOOK: Lineage: random_types_table.cdecimal128 SCRIPT []
+POSTHOOK: Lineage: random_types_table.cdecimal64 SCRIPT []
+POSTHOOK: Lineage: random_types_table.cdouble SCRIPT []
+POSTHOOK: Lineage: random_types_table.cfloat SCRIPT []
+POSTHOOK: Lineage: random_types_table.cint SCRIPT []
+POSTHOOK: Lineage: random_types_table.cstring SCRIPT []
+POSTHOOK: Lineage: random_types_table.ctimestamp SCRIPT []
+POSTHOOK: Lineage: random_types_table.cvarchar SCRIPT []
+PREHOOK: query: select 'expect 1', approx_distinct(cboolean) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(cboolean) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(cint) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(cint) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(cbigint) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(cbigint) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(cfloat) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(cfloat) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(cdouble) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(cdouble) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(cdecimal64) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(cdecimal64) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(cdecimal128) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(cdecimal128) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(cdate) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(cdate) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(ctimestamp) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(ctimestamp) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(cstring) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(cstring) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(cvarchar) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(cvarchar) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: select 'expect 1', approx_distinct(cchar) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 1', approx_distinct(cchar) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 1       1
+PREHOOK: query: insert into random_types_table values (false, 2, 2, 2.0, 2.0, 
2.0BD, 2.0BD, '1999-12-31', '1999-12-31 00:00:01', 'X', 'Y', 'Z')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@random_types_table
+POSTHOOK: query: insert into random_types_table values (false, 2, 2, 2.0, 2.0, 
2.0BD, 2.0BD, '1999-12-31', '1999-12-31 00:00:01', 'X', 'Y', 'Z')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@random_types_table
+POSTHOOK: Lineage: random_types_table.cbigint SCRIPT []
+POSTHOOK: Lineage: random_types_table.cboolean SCRIPT []
+POSTHOOK: Lineage: random_types_table.cchar SCRIPT []
+POSTHOOK: Lineage: random_types_table.cdate SCRIPT []
+POSTHOOK: Lineage: random_types_table.cdecimal128 SCRIPT []
+POSTHOOK: Lineage: random_types_table.cdecimal64 SCRIPT []
+POSTHOOK: Lineage: random_types_table.cdouble SCRIPT []
+POSTHOOK: Lineage: random_types_table.cfloat SCRIPT []
+POSTHOOK: Lineage: random_types_table.cint SCRIPT []
+POSTHOOK: Lineage: random_types_table.cstring SCRIPT []
+POSTHOOK: Lineage: random_types_table.ctimestamp SCRIPT []
+POSTHOOK: Lineage: random_types_table.cvarchar SCRIPT []
+PREHOOK: query: select 'expect 2', approx_distinct(cboolean) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 2', approx_distinct(cboolean) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 2       2
+PREHOOK: query: select 'expect 2', approx_distinct(cint) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 2', approx_distinct(cint) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 2       2
+PREHOOK: query: select 'expect 2', approx_distinct(cbigint) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 2', approx_distinct(cbigint) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 2       2
+PREHOOK: query: select 'expect 2', approx_distinct(cfloat) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 2', approx_distinct(cfloat) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 2       2
+PREHOOK: query: select 'expect 2', approx_distinct(cdouble) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 2', approx_distinct(cdouble) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 2       2
+PREHOOK: query: select 'expect 2', approx_distinct(cdecimal64) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 2', approx_distinct(cdecimal64) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 2       2
+PREHOOK: query: select 'expect 2', approx_distinct(cdecimal128) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 2', approx_distinct(cdecimal128) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 2       2
+PREHOOK: query: select 'expect 2', approx_distinct(cdate) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 2', approx_distinct(cdate) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 2       2
+PREHOOK: query: select 'expect 2', approx_distinct(ctimestamp) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 2', approx_distinct(ctimestamp) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 2       2
+PREHOOK: query: select 'expect 2', approx_distinct(cstring) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 2', approx_distinct(cstring) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 2       2
+PREHOOK: query: select 'expect 2', approx_distinct(cvarchar) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 2', approx_distinct(cvarchar) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 2       2
+PREHOOK: query: select 'expect 2', approx_distinct(cchar) from 
random_types_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 'expect 2', approx_distinct(cchar) from 
random_types_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@random_types_table
+#### A masked pattern was here ####
+expect 2       2

http://git-wip-us.apache.org/repos/asf/hive/blob/7b9540e4/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out 
b/ql/src/test/results/clientpositive/show_functions.q.out
index 9044ea4..0fdcbda 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -26,6 +26,7 @@ add_months
 aes_decrypt
 aes_encrypt
 and
+approx_distinct
 array
 array_contains
 ascii

Reply via email to