Modified: hive/trunk/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb?rev=1582546&r1=1582545&r2=1582546&view=diff ============================================================================== --- hive/trunk/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb (original) +++ hive/trunk/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb Thu Mar 27 23:40:03 2014 @@ -712,6 +712,52 @@ class BinaryColumnStatsData ::Thrift::Struct.generate_accessors self end +class Decimal + include ::Thrift::Struct, ::Thrift::Struct_Union + UNSCALED = 1 + SCALE = 3 + + FIELDS = { + UNSCALED => {:type => ::Thrift::Types::STRING, :name => 'unscaled', :binary => true}, + SCALE => {:type => ::Thrift::Types::I16, :name => 'scale'} + } + + def struct_fields; FIELDS; end + + def validate + raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field unscaled is unset!') unless @unscaled + raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field scale is unset!') unless @scale + end + + ::Thrift::Struct.generate_accessors self +end + +class DecimalColumnStatsData + include ::Thrift::Struct, ::Thrift::Struct_Union + LOWVALUE = 1 + HIGHVALUE = 2 + NUMNULLS = 3 + NUMDVS = 4 + + FIELDS = { + LOWVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'lowValue', :class => ::Decimal}, + HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Decimal}, + NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, + NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'} + } + + def struct_fields; FIELDS; end + + def validate + raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field lowValue is unset!') unless @lowValue + raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field highValue is unset!') unless @highValue + raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numNulls is unset!') unless @numNulls + raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numDVs is unset!') unless @numDVs + end + + ::Thrift::Struct.generate_accessors self +end + class ColumnStatisticsData < ::Thrift::Union include ::Thrift::Struct_Union class << self @@ -734,6 +780,10 @@ class ColumnStatisticsData < ::Thrift::U def binaryStats(val) ColumnStatisticsData.new(:binaryStats, val) end + + def decimalStats(val) + ColumnStatisticsData.new(:decimalStats, val) + end end BOOLEANSTATS = 1 @@ -741,13 +791,15 @@ class ColumnStatisticsData < ::Thrift::U DOUBLESTATS = 3 STRINGSTATS = 4 BINARYSTATS = 5 + DECIMALSTATS = 6 FIELDS = { BOOLEANSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'booleanStats', :class => ::BooleanColumnStatsData}, LONGSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'longStats', :class => ::LongColumnStatsData}, DOUBLESTATS => {:type => ::Thrift::Types::STRUCT, :name => 'doubleStats', :class => ::DoubleColumnStatsData}, STRINGSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'stringStats', :class => ::StringColumnStatsData}, - BINARYSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'binaryStats', :class => ::BinaryColumnStatsData} + BINARYSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'binaryStats', :class => ::BinaryColumnStatsData}, + DECIMALSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'decimalStats', :class => ::DecimalColumnStatsData} } def struct_fields; FIELDS; end
Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java?rev=1582546&r1=1582545&r2=1582546&view=diff ============================================================================== --- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java (original) +++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java Thu Mar 27 23:40:03 2014 @@ -937,8 +937,9 @@ class MetaStoreDirectSql { /** The common query part for table and partition stats */ private static final String STATS_COLLIST = "\"COLUMN_NAME\", \"COLUMN_TYPE\", \"LONG_LOW_VALUE\", \"LONG_HIGH_VALUE\", " - + "\"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", " - + "\"AVG_COL_LEN\", \"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\""; + + "\"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", \"BIG_DECIMAL_LOW_VALUE\", " + + "\"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", \"AVG_COL_LEN\", " + + "\"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\" "; private ColumnStatistics makeColumnStats( List<Object[]> list, ColumnStatisticsDesc csd, int offset) { @@ -948,7 +949,7 @@ class MetaStoreDirectSql { for (Object[] row : list) { // LastAnalyzed is stored per column but thrift has it per several; // get the lowest for now as nobody actually uses this field. - Object laObj = row[offset + 12]; + Object laObj = row[offset + 14]; if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > (Long)laObj)) { csd.setLastAnalyzed((Long)laObj); } @@ -957,10 +958,10 @@ class MetaStoreDirectSql { int i = offset; ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data); Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++], - nulls = row[i++], dist = row[i++], avglen = row[i++], maxlen = row[i++], - trues = row[i++], falses = row[i++]; + declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++], + avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++]; StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data, - llow, lhigh, dlow, dhigh, nulls, dist, avglen, maxlen, trues, falses); + llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses); csos.add(cso); } result.setStatsObj(csos); Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java?rev=1582546&r1=1582545&r2=1582546&view=diff ============================================================================== --- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java (original) +++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java Thu Mar 27 23:40:03 2014 @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.metastore; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -27,6 +30,8 @@ import org.apache.hadoop.hive.metastore. import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; @@ -74,6 +79,11 @@ public class StatObjectConverter { DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats(); mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(), doubleStats.getLowValue(), doubleStats.getHighValue()); + } else if (statsObj.getStatsData().isSetDecimalStats()) { + DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats(); + String low = createJdoDecimalString(decimalStats.getLowValue()), + high = createJdoDecimalString(decimalStats.getHighValue()); + mColStats.setDecimalStats(decimalStats.getNumNulls(), decimalStats.getNumDVs(), low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); mColStats.setStringStats(stringStats.getNumNulls(), stringStats.getNumDVs(), @@ -94,6 +104,8 @@ public class StatObjectConverter { oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed()); oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue()); oldStatsObj.setDoubleLowValue(mStatsObj.getDoubleLowValue()); + oldStatsObj.setDecimalLowValue(mStatsObj.getDecimalLowValue()); + oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue()); oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); @@ -109,6 +121,8 @@ public class StatObjectConverter { oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed()); oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue()); oldStatsObj.setDoubleLowValue(mStatsObj.getDoubleLowValue()); + oldStatsObj.setDecimalLowValue(mStatsObj.getDecimalLowValue()); + oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue()); oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); @@ -160,6 +174,13 @@ public class StatObjectConverter { doubleStats.setLowValue(mStatsObj.getDoubleLowValue()); doubleStats.setNumDVs(mStatsObj.getNumDVs()); colStatsData.setDoubleStats(doubleStats); + } else if (colType.equals("decimal")) { + DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + decimalStats.setNumNulls(mStatsObj.getNumNulls()); + decimalStats.setHighValue(createThriftDecimal(mStatsObj.getDecimalHighValue())); + decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue())); + decimalStats.setNumDVs(mStatsObj.getNumDVs()); + colStatsData.setDecimalStats(decimalStats); } statsObj.setStatsData(colStatsData); return statsObj; @@ -203,6 +224,11 @@ public class StatObjectConverter { DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats(); mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(), doubleStats.getLowValue(), doubleStats.getHighValue()); + } else if (statsObj.getStatsData().isSetDecimalStats()) { + DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats(); + String low = createJdoDecimalString(decimalStats.getLowValue()), + high = createJdoDecimalString(decimalStats.getHighValue()); + mColStats.setDecimalStats(decimalStats.getNumNulls(), decimalStats.getNumDVs(), low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); mColStats.setStringStats(stringStats.getNumNulls(), stringStats.getNumDVs(), @@ -259,6 +285,13 @@ public class StatObjectConverter { doubleStats.setLowValue(mStatsObj.getDoubleLowValue()); doubleStats.setNumDVs(mStatsObj.getNumDVs()); colStatsData.setDoubleStats(doubleStats); + } else if (colType.equals("decimal")) { + DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + decimalStats.setNumNulls(mStatsObj.getNumNulls()); + decimalStats.setHighValue(createThriftDecimal(mStatsObj.getDecimalHighValue())); + decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue())); + decimalStats.setNumDVs(mStatsObj.getNumDVs()); + colStatsData.setDecimalStats(decimalStats); } statsObj.setStatsData(colStatsData); return statsObj; @@ -277,8 +310,8 @@ public class StatObjectConverter { // SQL public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, - Object llow, Object lhigh, Object dlow, Object dhigh, Object nulls, Object dist, - Object avglen, Object maxlen, Object trues, Object falses) { + Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, + Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses) { if (colType.equals("boolean")) { BooleanColumnStatsData boolStats = new BooleanColumnStatsData(); boolStats.setNumFalses((Long)falses); @@ -315,6 +348,22 @@ public class StatObjectConverter { doubleStats.setLowValue((Double)dlow); doubleStats.setNumDVs((Long)dist); data.setDoubleStats(doubleStats); + } else if (colType.equals("decimal")) { + DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + decimalStats.setNumNulls((Long)nulls); + decimalStats.setHighValue(createThriftDecimal((String)dechigh)); + decimalStats.setLowValue(createThriftDecimal((String)declow)); + decimalStats.setNumDVs((Long)dist); + data.setDecimalStats(decimalStats); } } + + private static Decimal createThriftDecimal(String s) { + BigDecimal d = new BigDecimal(s); + return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short)d.scale()); + } + + private static String createJdoDecimalString(Decimal d) { + return new BigDecimal(new BigInteger(d.getUnscaled()), d.getScale()).toString(); + } } Modified: hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java?rev=1582546&r1=1582545&r2=1582546&view=diff ============================================================================== --- hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java (original) +++ hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java Thu Mar 27 23:40:03 2014 @@ -44,6 +44,8 @@ public class MPartitionColumnStatistics private long longHighValue; private double doubleLowValue; private double doubleHighValue; + private String decimalLowValue; + private String decimalHighValue; private long numNulls; private long numDVs; private double avgColLen; @@ -178,6 +180,14 @@ public class MPartitionColumnStatistics this.doubleHighValue = highValue; } + public void setDecimalStats( + long numNulls, long numNDVs, String lowValue, String highValue) { + this.numNulls = numNulls; + this.numDVs = numNDVs; + this.decimalLowValue = lowValue; + this.decimalHighValue = highValue; + } + public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) { this.numNulls = numNulls; this.numDVs = numNDVs; @@ -221,4 +231,20 @@ public class MPartitionColumnStatistics public void setDoubleHighValue(double doubleHighValue) { this.doubleHighValue = doubleHighValue; } + + public String getDecimalLowValue() { + return decimalLowValue; + } + + public void setDecimalLowValue(String decimalLowValue) { + this.decimalLowValue = decimalLowValue; + } + + public String getDecimalHighValue() { + return decimalHighValue; + } + + public void setDecimalHighValue(String decimalHighValue) { + this.decimalHighValue = decimalHighValue; + } } Modified: hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java?rev=1582546&r1=1582545&r2=1582546&view=diff ============================================================================== --- hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java (original) +++ hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java Thu Mar 27 23:40:03 2014 @@ -42,6 +42,8 @@ public class MTableColumnStatistics { private long longHighValue; private double doubleLowValue; private double doubleHighValue; + private String decimalLowValue; + private String decimalHighValue; private long numNulls; private long numDVs; private double avgColLen; @@ -168,6 +170,14 @@ public class MTableColumnStatistics { this.doubleHighValue = highValue; } + public void setDecimalStats( + long numNulls, long numNDVs, String lowValue, String highValue) { + this.numNulls = numNulls; + this.numDVs = numNDVs; + this.decimalLowValue = lowValue; + this.decimalHighValue = highValue; + } + public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) { this.numNulls = numNulls; this.numDVs = numNDVs; @@ -212,4 +222,21 @@ public class MTableColumnStatistics { public void setDoubleHighValue(double doubleHighValue) { this.doubleHighValue = doubleHighValue; } + + + public String getDecimalLowValue() { + return decimalLowValue; + } + + public void setDecimalLowValue(String decimalLowValue) { + this.decimalLowValue = decimalLowValue; + } + + public String getDecimalHighValue() { + return decimalHighValue; + } + + public void setDecimalHighValue(String decimalHighValue) { + this.decimalHighValue = decimalHighValue; + } } Modified: hive/trunk/metastore/src/model/package.jdo URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/package.jdo?rev=1582546&r1=1582545&r2=1582546&view=diff ============================================================================== --- hive/trunk/metastore/src/model/package.jdo (original) +++ hive/trunk/metastore/src/model/package.jdo Thu Mar 27 23:40:03 2014 @@ -825,6 +825,12 @@ <field name="doubleHighValue"> <column name="DOUBLE_HIGH_VALUE" jdbc-type="DOUBLE" allows-null="true"/> </field> + <field name="decimalLowValue"> + <column name="BIG_DECIMAL_LOW_VALUE" jdbc-type="VARCHAR" allows-null="true"/> + </field> + <field name="decimalHighValue"> + <column name="BIG_DECIMAL_HIGH_VALUE" jdbc-type="VARCHAR" allows-null="true"/> + </field> <field name="numNulls"> <column name="NUM_NULLS" jdbc-type="BIGINT" allows-null="false"/> </field> @@ -883,6 +889,12 @@ <field name="doubleHighValue"> <column name="DOUBLE_HIGH_VALUE" jdbc-type="DOUBLE" allows-null="true"/> </field> + <field name="decimalLowValue"> + <column name="BIG_DECIMAL_LOW_VALUE" jdbc-type="VARCHAR" allows-null="true"/> + </field> + <field name="decimalHighValue"> + <column name="BIG_DECIMAL_HIGH_VALUE" jdbc-type="VARCHAR" allows-null="true"/> + </field> <field name="numNulls"> <column name="NUM_NULLS" jdbc-type="BIGINT" allows-null="false"/> </field> Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java?rev=1582546&r1=1582545&r2=1582546&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java Thu Mar 27 23:40:03 2014 @@ -20,11 +20,15 @@ package org.apache.hadoop.hive.ql.exec; import java.io.IOException; import java.io.Serializable; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; @@ -32,6 +36,8 @@ import org.apache.hadoop.hive.metastore. import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; @@ -48,6 +54,7 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.mapred.JobConf; @@ -110,6 +117,27 @@ public class ColumnStatsTask extends Tas } } + private void unpackDecimalStats(ObjectInspector oi, Object o, String fName, + ColumnStatisticsObj statsObj) { + if (fName.equals("countnulls")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDecimalStats().setNumNulls(v); + } else if (fName.equals("numdistinctvalues")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDecimalStats().setNumDVs(v); + } else if (fName.equals("max")) { + HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setHighValue(convertToThriftDecimal(d)); + } else if (fName.equals("min")) { + HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d)); + } + } + + private Decimal convertToThriftDecimal(HiveDecimal d) { + return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short)d.scale()); + } + private void unpackLongStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { if (fName.equals("countnulls")) { @@ -186,6 +214,10 @@ public class ColumnStatsTask extends Tas BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); statsData.setBinaryStats(binaryStats); statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("decimal")) { + DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + statsData.setDecimalStats(decimalStats); + statsObj.setStatsData(statsData); } } else { // invoke the right unpack method depending on data type of the column @@ -199,6 +231,8 @@ public class ColumnStatsTask extends Tas unpackStringStats(oi, o, fieldName, statsObj); } else if (statsObj.getStatsData().isSetBinaryStats()) { unpackBinaryStats(oi, o, fieldName, statsObj); + } else if (statsObj.getStatsData().isSetDecimalStats()) { + unpackDecimalStats(oi, o, fieldName, statsObj); } } } Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DecimalNumDistinctValueEstimator.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DecimalNumDistinctValueEstimator.java?rev=1582546&view=auto ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DecimalNumDistinctValueEstimator.java (added) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DecimalNumDistinctValueEstimator.java Thu Mar 27 23:40:03 2014 @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.type.HiveDecimal; + +public class DecimalNumDistinctValueEstimator extends NumDistinctValueEstimator { + + public DecimalNumDistinctValueEstimator(int numBitVectors) { + super(numBitVectors); + } + + public DecimalNumDistinctValueEstimator(String s, int numBitVectors) { + super(s, numBitVectors); + } + + public void addToEstimator(HiveDecimal decimal) { + int v = decimal.hashCode(); + super.addToEstimator(v); + } + + public void addToEstimatorPCSA(HiveDecimal decimal) { + int v = decimal.hashCode(); + super.addToEstimatorPCSA(v); + } +} Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java?rev=1582546&r1=1582545&r2=1582546&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java Thu Mar 27 23:40:03 2014 @@ -17,28 +17,26 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import java.math.BigDecimal; import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.*; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.BytesWritable; @@ -88,9 +86,11 @@ public class GenericUDAFComputeStats ext return new GenericUDAFStringStatsEvaluator(); case BINARY: return new GenericUDAFBinaryStatsEvaluator(); + case DECIMAL: + return new GenericUDAFDecimalStatsEvaluator(); default: throw new UDFArgumentTypeException(0, - "Only integer/long/timestamp/float/double/string/binary/boolean type argument " + + "Only integer/long/timestamp/float/double/string/binary/boolean/decimal type argument " + "is accepted but " + parameters[0].getTypeName() + " is passed."); } @@ -1474,4 +1474,305 @@ public class GenericUDAFComputeStats ext return result; } } + + public static class GenericUDAFDecimalStatsEvaluator extends GenericUDAFEvaluator { + + /* + * Object Inspector corresponding to the input parameter. + */ + private transient PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector numVectorsOI; + private final static int MAX_BIT_VECTORS = 1024; + + /* Partial aggregation result returned by TerminatePartial. Partial result is a struct + * containing a long field named "count". + */ + private transient Object[] partialResult; + + /* Object Inspectors corresponding to the struct returned by TerminatePartial and the long + * field within the struct - "count" + */ + private transient StructObjectInspector soi; + + private transient StructField minField; + private transient WritableHiveDecimalObjectInspector minFieldOI; + + private transient StructField maxField; + private transient WritableHiveDecimalObjectInspector maxFieldOI; + + private transient StructField countNullsField; + private transient WritableLongObjectInspector countNullsFieldOI; + + private transient StructField ndvField; + private transient WritableStringObjectInspector ndvFieldOI; + + private transient StructField numBitVectorsField; + private transient WritableIntObjectInspector numBitVectorsFieldOI; + + /* Output of final result of the aggregation + */ + private transient Object[] result; + + private boolean warned = false; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + + // initialize input + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + inputOI = (PrimitiveObjectInspector) parameters[0]; + numVectorsOI = (PrimitiveObjectInspector) parameters[1]; + } else { + soi = (StructObjectInspector) parameters[0]; + + minField = soi.getStructFieldRef("Min"); + minFieldOI = (WritableHiveDecimalObjectInspector) minField.getFieldObjectInspector(); + + maxField = soi.getStructFieldRef("Max"); + maxFieldOI = (WritableHiveDecimalObjectInspector) maxField.getFieldObjectInspector(); + + countNullsField = soi.getStructFieldRef("CountNulls"); + countNullsFieldOI = (WritableLongObjectInspector) countNullsField.getFieldObjectInspector(); + + ndvField = soi.getStructFieldRef("BitVector"); + ndvFieldOI = (WritableStringObjectInspector) ndvField.getFieldObjectInspector(); + + numBitVectorsField = soi.getStructFieldRef("NumBitVectors"); + numBitVectorsFieldOI = (WritableIntObjectInspector) + numBitVectorsField.getFieldObjectInspector(); + } + + // initialize output + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { + List<ObjectInspector> foi = new ArrayList<ObjectInspector>(); + foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); + + List<String> fname = new ArrayList<String>(); + fname.add("ColumnType"); + fname.add("Min"); + fname.add("Max"); + fname.add("CountNulls"); + fname.add("BitVector"); + fname.add("NumBitVectors"); + + partialResult = new Object[6]; + partialResult[0] = new Text(); + partialResult[1] = new HiveDecimalWritable(HiveDecimal.create(0)); + partialResult[2] = new HiveDecimalWritable(HiveDecimal.create(0)); + partialResult[3] = new LongWritable(0); + partialResult[4] = new Text(); + partialResult[5] = new IntWritable(0); + + return ObjectInspectorFactory.getStandardStructObjectInspector(fname, + foi); + } else { + List<ObjectInspector> foi = new ArrayList<ObjectInspector>(); + foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + + List<String> fname = new ArrayList<String>(); + fname.add("ColumnType"); + fname.add("Min"); + fname.add("Max"); + fname.add("CountNulls"); + fname.add("NumDistinctValues"); + + result = new Object[5]; + result[0] = new Text(); + result[1] = new HiveDecimalWritable(HiveDecimal.create(0)); + result[2] = new HiveDecimalWritable(HiveDecimal.create(0)); + result[3] = new LongWritable(0); + result[4] = new LongWritable(0); + + return ObjectInspectorFactory.getStandardStructObjectInspector(fname, + foi); + } + } + + @AggregationType(estimable = true) + public static class DecimalStatsAgg extends AbstractAggregationBuffer { + public String columnType; + public HiveDecimal min; /* Minimum value seen so far */ + public HiveDecimal max; /* Maximum value seen so far */ + public long countNulls; /* Count of number of null values seen so far */ + public DecimalNumDistinctValueEstimator numDV; /* Distinct value estimator */ + public boolean firstItem; /* First item in the aggBuf? */ + public int numBitVectors; + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return model.primitive1() * 2 + model.primitive2() + model.lengthOfDecimal() * 2 + + model.lengthFor(columnType) + model.lengthFor(numDV); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + DecimalStatsAgg result = new DecimalStatsAgg(); + reset(result); + return result; + } + + public void initNDVEstimator(DecimalStatsAgg aggBuffer, int numBitVectors) { + aggBuffer.numDV = new DecimalNumDistinctValueEstimator(numBitVectors); + aggBuffer.numDV.reset(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + DecimalStatsAgg myagg = (DecimalStatsAgg) agg; + myagg.columnType = new String("Decimal"); + myagg.min = HiveDecimal.create(0); + myagg.max = HiveDecimal.create(0); + myagg.countNulls = 0; + myagg.firstItem = true; + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + Object p = parameters[0]; + DecimalStatsAgg myagg = (DecimalStatsAgg) agg; + boolean emptyTable = false; + + if (parameters[1] == null) { + emptyTable = true; + } + + if (myagg.firstItem) { + int numVectors = 0; + if (!emptyTable) { + numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI); + } + + if (numVectors > MAX_BIT_VECTORS) { + throw new HiveException("The maximum allowed value for number of bit vectors " + + " is " + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors"); + } + + initNDVEstimator(myagg, numVectors); + myagg.firstItem = false; + myagg.numBitVectors = numVectors; + } + + if (!emptyTable) { + + //Update null counter if a null value is seen + if (p == null) { + myagg.countNulls++; + } + else { + try { + + HiveDecimal v = PrimitiveObjectInspectorUtils.getHiveDecimal(p, inputOI); + + //Update min counter if new value is less than min seen so far + if (v.compareTo(myagg.min) < 0) { + myagg.min = v; + } + + //Update max counter if new value is greater than max seen so far + if (v.compareTo(myagg.max) > 0) { + myagg.max = v; + } + + // Add value to NumDistinctValue Estimator + myagg.numDV.addToEstimator(v); + + } catch (NumberFormatException e) { + if (!warned) { + warned = true; + LOG.warn(getClass().getSimpleName() + " " + + StringUtils.stringifyException(e)); + LOG.warn(getClass().getSimpleName() + + " ignoring similar exceptions."); + } + } + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + DecimalStatsAgg myagg = (DecimalStatsAgg) agg; + + // Serialize numDistinctValue Estimator + Text t = myagg.numDV.serialize(); + + // Serialize the rest of the values in the AggBuffer + ((Text) partialResult[0]).set(myagg.columnType); + ((HiveDecimalWritable) partialResult[1]).set(myagg.min); + ((HiveDecimalWritable) partialResult[2]).set(myagg.max); + ((LongWritable) partialResult[3]).set(myagg.countNulls); + ((Text) partialResult[4]).set(t); + ((IntWritable) partialResult[5]).set(myagg.numBitVectors); + + return partialResult; + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial != null) { + DecimalStatsAgg myagg = (DecimalStatsAgg) agg; + + if (myagg.firstItem) { + Object partialValue = soi.getStructFieldData(partial, numBitVectorsField); + int numVectors = numBitVectorsFieldOI.get(partialValue); + initNDVEstimator(myagg, numVectors); + myagg.firstItem = false; + myagg.numBitVectors = numVectors; + } + + // Update min if min is lesser than the smallest value seen so far + Object partialValue = soi.getStructFieldData(partial, minField); + if (myagg.min.compareTo(minFieldOI.getPrimitiveJavaObject(partialValue)) > 0) { + myagg.min = minFieldOI.getPrimitiveJavaObject(partialValue); + } + + // Update max if max is greater than the largest value seen so far + partialValue = soi.getStructFieldData(partial, maxField); + if (myagg.max.compareTo(maxFieldOI.getPrimitiveJavaObject(partialValue)) < 0) { + myagg.max = maxFieldOI.getPrimitiveJavaObject(partialValue); + } + + // Update the null counter + partialValue = soi.getStructFieldData(partial, countNullsField); + myagg.countNulls += countNullsFieldOI.get(partialValue); + + // Merge numDistinctValue Estimators + partialValue = soi.getStructFieldData(partial, ndvField); + String v = ndvFieldOI.getPrimitiveJavaObject(partialValue); + + NumDistinctValueEstimator o = new NumDistinctValueEstimator(v, myagg.numBitVectors); + myagg.numDV.mergeEstimators(o); + } + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + DecimalStatsAgg myagg = (DecimalStatsAgg) agg; + long numDV = 0; + + if (myagg.numBitVectors != 0) { + numDV = myagg.numDV.estimateNumDistinctValues(); + } + + // Serialize the result struct + ((Text) result[0]).set(myagg.columnType); + ((HiveDecimalWritable) result[1]).set(myagg.min); + ((HiveDecimalWritable) result[2]).set(myagg.max); + ((LongWritable) result[3]).set(myagg.countNulls); + ((LongWritable) result[4]).set(numDV); + + return result; + } + } } Added: hive/trunk/ql/src/test/queries/clientpositive/compute_stats_decimal.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/compute_stats_decimal.q?rev=1582546&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/compute_stats_decimal.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/compute_stats_decimal.q Thu Mar 27 23:40:03 2014 @@ -0,0 +1,11 @@ +set hive.stats.autogather=true; + +create table tab_decimal(a decimal(10,3)); + +-- insert some data +LOAD DATA LOCAL INPATH "../../data/files/decimal.txt" INTO TABLE tab_decimal; + +select count(*) from tab_decimal; + +-- compute statistical summary of data +select compute_stats(a, 18) from tab_decimal; Added: hive/trunk/ql/src/test/results/clientpositive/compute_stats_decimal.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/compute_stats_decimal.q.out?rev=1582546&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/compute_stats_decimal.q.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/compute_stats_decimal.q.out Thu Mar 27 23:40:03 2014 @@ -0,0 +1,37 @@ +PREHOOK: query: create table tab_decimal(a decimal(10,3)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table tab_decimal(a decimal(10,3)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_decimal +PREHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../../data/files/decimal.txt" INTO TABLE tab_decimal +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tab_decimal +POSTHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../../data/files/decimal.txt" INTO TABLE tab_decimal +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tab_decimal +PREHOOK: query: select count(*) from tab_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@tab_decimal +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab_decimal +#### A masked pattern was here #### +18 +PREHOOK: query: -- compute statistical summary of data +select compute_stats(a, 18) from tab_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@tab_decimal +#### A masked pattern was here #### +POSTHOOK: query: -- compute statistical summary of data +select compute_stats(a, 18) from tab_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab_decimal +#### A masked pattern was here #### +{"columntype":"Decimal","min":-87.2,"max":435.331,"countnulls":2,"numdistinctvalues":13}
