[ 
https://issues.apache.org/jira/browse/HIVE-2185?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13044142#comment-13044142
 ] 

Hudson commented on HIVE-2185:
------------------------------

Integrated in Hive-trunk-h0.21 #759 (See 
[https://builds.apache.org/hudson/job/Hive-trunk-h0.21/759/])
    HIVE-2185. extend table statistics to store the size of uncompressed data 
(+extend interfaces for collecting other types of statistics) (Tomasz Nykiel 
via Ning Zhang)

nzhang : http://svn.apache.org/viewcvs.cgi/?root=Apache-SVN&view=rev&rev=1131106
Files : 
* 
/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsUtils.java
* /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
* /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java
* /hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin2.q.out
* 
/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
* 
/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java
* /hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin4.q.out
* 
/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java
* 
/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/s3/S3LogDeserializer.java
* /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
* /hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java
* /hive/trunk/hbase-handler/src/test/results/hbase_stats.q.out
* 
/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
* /hive/trunk/ql/src/test/org/apache/hadoop/hive/serde2/TestSerDe.java
* /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsPublisher.java
* /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeStatsStruct.java
* 
/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
* /hive/trunk/hbase-handler/src/test/results/hbase_stats2.q.out
* 
/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java
* /hive/trunk/hbase-handler/src/test/queries/hbase_stats.q
* /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
* /hive/trunk/ql/src/test/results/clientpositive/merge4.q.out
* /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
* 
/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsUtils.java
* /hive/trunk/ql/src/test/queries/clientpositive/stats15.q
* 
/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDe.java
* /hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisher.java
* 
/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
* 
/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java
* /hive/trunk/ql/src/test/results/clientpositive/stats14.q.out
* 
/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsPublisher.java
* /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
* /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Stat.java
* /hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin1.q.out
* /hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin3.q.out
* /hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
* 
/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
* 
/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java
* /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
* /hive/trunk/ql/src/test/results/clientpositive/union22.q.out
* /hive/trunk/ql/src/test/results/clientpositive/filter_join_breaktask.q.out
* /hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
* /hive/trunk/hbase-handler/src/test/queries/hbase_stats2.q
* 
/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
* 
/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/RegexSerDe.java
* /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
* /hive/trunk/ql/src/test/results/clientpositive/pcr.q.out
* 
/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java
* 
/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsAggregator.java
* 
/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisherEnhanced.java
* /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
* /hive/trunk/ql/src/test/results/clientpositive/join_map_ppr.q.out
* /hive/trunk/ql/src/test/results/clientpositive/merge3.q.out
* 
/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java
* /hive/trunk/ql/src/test/queries/clientpositive/stats14.q
* /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
* 
/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/TypedBytesSerDe.java
* /hive/trunk/ql/src/test/results/clientpositive/sample10.q.out
* 
/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsSetupConstants.java
* /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeStats.java
* /hive/trunk/ql/src/test/results/clientpositive/stats11.q.out
* /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsAggregator.java
* /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsSetupConst.java
* /hive/trunk/ql/src/test/results/clientpositive/stats15.q.out
* /hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java
* /hive/trunk/ql/src/test/results/clientpositive/combine2.q.out


> extend table statistics to store the size of uncompressed data (+extend 
> interfaces for collecting other types of statistics)
> ----------------------------------------------------------------------------------------------------------------------------
>
>                 Key: HIVE-2185
>                 URL: https://issues.apache.org/jira/browse/HIVE-2185
>             Project: Hive
>          Issue Type: New Feature
>          Components: Serializers/Deserializers, Statistics
>            Reporter: Tomasz Nykiel
>            Assignee: Tomasz Nykiel
>             Fix For: 0.8.0
>
>         Attachments: HIVE-2185.1.patch, HIVE-2185.2.patch, HIVE-2185.patch
>
>
> Currently, when executing INSERT OVERWRITE and ANALYZE TABLE commands we 
> collect statistics about the number of rows per partition/table. Other 
> statistics (e.g., total table/partition size) are derived from the file 
> system. 
> Here, we want to collect information about the sizes of uncompressed data, to 
> be able to determine the efficiency of compression.
> Currently, a large part of statistics collection mechanism is hardcoded and 
> not-easily extensible for other statistics.
> On top of adding the new statistic collected, it would be desirable to extend 
> the collection mechanism, so any new statistics could be added easily.

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

Reply via email to