[ https://issues.apache.org/jira/browse/HIVE-2185?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13044142#comment-13044142 ]
Hudson commented on HIVE-2185: ------------------------------ Integrated in Hive-trunk-h0.21 #759 (See [https://builds.apache.org/hudson/job/Hive-trunk-h0.21/759/]) HIVE-2185. extend table statistics to store the size of uncompressed data (+extend interfaces for collecting other types of statistics) (Tomasz Nykiel via Ning Zhang) nzhang : http://svn.apache.org/viewcvs.cgi/?root=Apache-SVN&view=rev&rev=1131106 Files : * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsUtils.java * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java * /hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin2.q.out * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java * /hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin4.q.out * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java * /hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/s3/S3LogDeserializer.java * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java * /hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java * /hive/trunk/hbase-handler/src/test/results/hbase_stats.q.out * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java * /hive/trunk/ql/src/test/org/apache/hadoop/hive/serde2/TestSerDe.java * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsPublisher.java * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeStatsStruct.java * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java * /hive/trunk/hbase-handler/src/test/results/hbase_stats2.q.out * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java * /hive/trunk/hbase-handler/src/test/queries/hbase_stats.q * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java * /hive/trunk/ql/src/test/results/clientpositive/merge4.q.out * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java * /hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsUtils.java * /hive/trunk/ql/src/test/queries/clientpositive/stats15.q * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDe.java * /hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisher.java * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java * /hive/trunk/ql/src/test/results/clientpositive/stats14.q.out * /hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsPublisher.java * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Stat.java * /hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin1.q.out * /hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin3.q.out * /hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin5.q.out * /hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java * /hive/trunk/ql/src/test/results/clientpositive/union22.q.out * /hive/trunk/ql/src/test/results/clientpositive/filter_join_breaktask.q.out * /hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java * /hive/trunk/hbase-handler/src/test/queries/hbase_stats2.q * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java * /hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/RegexSerDe.java * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java * /hive/trunk/ql/src/test/results/clientpositive/pcr.q.out * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java * /hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsAggregator.java * /hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisherEnhanced.java * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java * /hive/trunk/ql/src/test/results/clientpositive/join_map_ppr.q.out * /hive/trunk/ql/src/test/results/clientpositive/merge3.q.out * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java * /hive/trunk/ql/src/test/queries/clientpositive/stats14.q * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java * /hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/TypedBytesSerDe.java * /hive/trunk/ql/src/test/results/clientpositive/sample10.q.out * /hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsSetupConstants.java * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeStats.java * /hive/trunk/ql/src/test/results/clientpositive/stats11.q.out * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsAggregator.java * /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsSetupConst.java * /hive/trunk/ql/src/test/results/clientpositive/stats15.q.out * /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java * /hive/trunk/ql/src/test/results/clientpositive/combine2.q.out > extend table statistics to store the size of uncompressed data (+extend > interfaces for collecting other types of statistics) > ---------------------------------------------------------------------------------------------------------------------------- > > Key: HIVE-2185 > URL: https://issues.apache.org/jira/browse/HIVE-2185 > Project: Hive > Issue Type: New Feature > Components: Serializers/Deserializers, Statistics > Reporter: Tomasz Nykiel > Assignee: Tomasz Nykiel > Fix For: 0.8.0 > > Attachments: HIVE-2185.1.patch, HIVE-2185.2.patch, HIVE-2185.patch > > > Currently, when executing INSERT OVERWRITE and ANALYZE TABLE commands we > collect statistics about the number of rows per partition/table. Other > statistics (e.g., total table/partition size) are derived from the file > system. > Here, we want to collect information about the sizes of uncompressed data, to > be able to determine the efficiency of compression. > Currently, a large part of statistics collection mechanism is hardcoded and > not-easily extensible for other statistics. > On top of adding the new statistic collected, it would be desirable to extend > the collection mechanism, so any new statistics could be added easily. -- This message is automatically generated by JIRA. For more information on JIRA, see: http://www.atlassian.com/software/jira