karanmehta93 commented on a change in pull request #419: PHOENIX-4009 Run 
UPDATE STATISTICS command by using MR integration on…
URL: https://github.com/apache/phoenix/pull/419#discussion_r246261784
 
 

 ##########
 File path: 
phoenix-core/src/main/java/org/apache/phoenix/schema/stats/DefaultStatisticsCollector.java
 ##########
 @@ -220,68 +186,62 @@ public void close() throws IOException {
     }
 
     @Override
-    public void updateStatistic(Region region, Scan scan) {
+    public void updateStatistics(Region region, Scan scan) {
         try {
-            ArrayList<Mutation> mutations = new ArrayList<Mutation>();
-            writeStatistics(region, true, mutations, 
EnvironmentEdgeManager.currentTimeMillis(), scan);
-            if (logger.isDebugEnabled()) {
-                logger.debug("Committing new stats for the region " + 
region.getRegionInfo());
-            }
+            List<Mutation> mutations = new ArrayList<Mutation>();
+            writeStatistics(region, true, mutations,
+                    EnvironmentEdgeManager.currentTimeMillis(), scan);
             commitStats(mutations);
         } catch (IOException e) {
-            logger.error("Unable to commit new stats", e);
+            LOG.error("Unable to update SYSTEM.STATS table.", e);
         }
     }
 
     private void writeStatistics(final Region region, boolean delete, 
List<Mutation> mutations, long currentTime, Scan scan)
             throws IOException {
-        try {
-            Set<ImmutableBytesPtr> fams = guidePostsInfoWriterMap.keySet();
-            // Update the statistics table.
-            // Delete statistics for a region if no guide posts are collected 
for that region during
-            // UPDATE STATISTICS. This will not impact a stats collection of 
single column family during
-            // compaction as guidePostsInfoWriterMap cannot be empty in this 
case.
-            if (cachedGuidePosts == null) {
-                // We're either collecting stats for the data table or the 
local index table, but not both
-                // We can determine this based on the column families in the 
scan being prefixed with the
-                // local index column family prefix. We always explicitly 
specify the local index column
-                // families when we're collecting stats for a local index.
-                boolean collectingForLocalIndex = scan != null && 
!scan.getFamilyMap().isEmpty() && 
MetaDataUtil.isLocalIndexFamily(scan.getFamilyMap().keySet().iterator().next());
-                for (Store store : region.getStores()) {
-                    ImmutableBytesPtr cfKey = new 
ImmutableBytesPtr(store.getFamily().getName());
-                    boolean isLocalIndexStore = 
MetaDataUtil.isLocalIndexFamily(cfKey);
-                    if (isLocalIndexStore != collectingForLocalIndex) {
-                        continue;
-                    }
-                    if (!guidePostsInfoWriterMap.containsKey(cfKey)) {
-                        Pair<Long, GuidePostsInfoBuilder> emptyGps = new 
Pair<Long, GuidePostsInfoBuilder>(0l, new GuidePostsInfoBuilder());
-                        guidePostsInfoWriterMap.put(cfKey, emptyGps);
-                    }
+        Set<ImmutableBytesPtr> fams = guidePostsInfoWriterMap.keySet();
+        // Update the statistics table.
+        // Delete statistics for a region if no guide posts are collected for 
that region during
+        // UPDATE STATISTICS. This will not impact a stats collection of 
single column family during
+        // compaction as guidePostsInfoWriterMap cannot be empty in this case.
+        if (cachedGuidePosts == null) {
+            // We're either collecting stats for the data table or the local 
index table, but not both
+            // We can determine this based on the column families in the scan 
being prefixed with the
+            // local index column family prefix. We always explicitly specify 
the local index column
+            // families when we're collecting stats for a local index.
+            boolean collectingForLocalIndex = scan != null &&
+                    !scan.getFamilyMap().isEmpty() &&
+                    
MetaDataUtil.isLocalIndexFamily(scan.getFamilyMap().keySet().iterator().next());
+            for (Store store : region.getStores()) {
+                ImmutableBytesPtr cfKey = new 
ImmutableBytesPtr(store.getFamily().getName());
+                boolean isLocalIndexStore = 
MetaDataUtil.isLocalIndexFamily(cfKey);
+                if (isLocalIndexStore != collectingForLocalIndex) {
+                    continue;
                 }
-            }
-            for (ImmutableBytesPtr fam : fams) {
-                if (delete) {
-                    if (logger.isDebugEnabled()) {
-                        logger.debug("Deleting the stats for the region " + 
region.getRegionInfo());
-                    }
-                    statsWriter.deleteStatsForRegion(region, this, fam, 
mutations);
-                }
-                if (logger.isDebugEnabled()) {
-                    logger.debug("Adding new stats for the region " + 
region.getRegionInfo());
-                }
-                // If we've disabled stats, don't write any, just delete them
-                if (this.guidePostDepth > 0) {
-                    statsWriter.addStats(this, fam, mutations);
+                if (!guidePostsInfoWriterMap.containsKey(cfKey)) {
+                    Pair<Long, GuidePostsInfoBuilder> emptyGps = new 
Pair<Long, GuidePostsInfoBuilder>(0l, new GuidePostsInfoBuilder());
+                    guidePostsInfoWriterMap.put(cfKey, emptyGps);
                 }
             }
-        } catch (IOException e) {
-            logger.error("Failed to update statistics table!", e);
-            throw e;
+        }
+        for (ImmutableBytesPtr fam : fams) {
+            if (delete) {
 
 Review comment:
   All the log lines were changed to INFO since they will be produced only once 
per mapper (and a mapper would correspond to a region). These lines would 
really help us in debugging if something goes wrong. We can also export these 
as metrics if required.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to