eab148 commented on code in PR #6327:
URL: https://github.com/apache/hbase/pull/6327#discussion_r1803458112


##########
hbase-examples/src/main/java/org/apache/hadoop/hbase/coprocessor/example/row/stats/RowStatisticsImpl.java:
##########
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.coprocessor.example.row.stats;
+
+import java.util.Map;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.RawCellBuilder;
+import 
org.apache.hadoop.hbase.coprocessor.example.row.stats.utils.RowStatisticsUtil;
+import org.apache.hadoop.hbase.regionserver.Shipper;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.GsonUtil;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hbase.thirdparty.com.google.gson.Gson;
+import org.apache.hbase.thirdparty.com.google.gson.JsonObject;
+
+/**
+ * Holder for accumulating row statistics in {@link 
RowStatisticsCompactionObserver} Creates various
+ * cell, row, and total stats.
+ */
[email protected]
+public class RowStatisticsImpl implements RowStatistics {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(RowStatisticsImpl.class);
+  private static final Gson GSON = GsonUtil.createGson().create();
+
+  //
+  // Transient fields which are not included in gson serialization
+  //
+  private final transient long blockSize;
+  private final transient long maxCacheSize;
+  private transient int rowCells;
+  private transient long rowBytes;
+  private transient byte[] largestRow;
+  private transient Cell largestCell;
+  private final transient boolean isMajor;
+  private final transient SizeBucketTracker rowSizeBuckets;
+  private final transient SizeBucketTracker valueSizeBuckets;
+
+  // We don't need to clone anything until shipped() is called on scanner.
+  // To avoid allocations, we keep a reference until that point
+  private transient Cell largestRowRef;
+  private transient Cell largestCellRef;
+  //
+  // Non-transient fields which are included in gson
+  //
+  private final String table;
+  private final String region;
+  private final String columnFamily;
+  private long largestRowBytes;
+  private int largestRowCells;
+  private long largestCellBytes;
+  private int cellsLargerThanOneBlock;
+  private int rowsLargerThanOneBlock;
+  private int cellsLargerThanMaxCacheSize;
+  private int totalDeletes;
+  private int totalCells;
+  private int totalRows;
+  private long totalBytes;
+
+  RowStatisticsImpl(String table, String encodedRegion, String columnFamily, 
long blockSize,
+    long maxCacheSize, boolean isMajor) {
+    this.table = table;
+    this.region = encodedRegion;
+    this.columnFamily = columnFamily;
+    this.blockSize = blockSize;
+    this.maxCacheSize = maxCacheSize;
+    this.isMajor = isMajor;
+    this.rowSizeBuckets = new SizeBucketTracker();
+    this.valueSizeBuckets = new SizeBucketTracker();
+  }
+
+  public void handleRowChanged(Cell lastCell) {
+    if (rowBytes > largestRowBytes) {
+      largestRowRef = lastCell;
+      largestRowBytes = rowBytes;
+      largestRowCells = rowCells;
+    }
+    if (rowBytes > blockSize) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("RowTooLarge: rowBytes={}, blockSize={}, table={}, 
rowKey={}", rowBytes,
+          blockSize, table, Bytes.toStringBinary(lastCell.getRowArray(), 
lastCell.getRowOffset(),
+            lastCell.getRowLength()));
+      }
+      rowsLargerThanOneBlock++;
+    }
+    rowSizeBuckets.add(rowBytes);
+    rowBytes = 0;
+    rowCells = 0;
+    totalRows++;
+  }
+
+  public void consumeCell(Cell cell) {
+    int cellSize = cell.getSerializedSize();
+
+    rowBytes += cellSize;
+    rowCells++;
+
+    boolean tooLarge = false;
+    if (cellSize > maxCacheSize) {
+      cellsLargerThanMaxCacheSize++;
+      tooLarge = true;
+    }
+    if (cellSize > blockSize) {
+      cellsLargerThanOneBlock++;
+      tooLarge = true;
+    }
+
+    if (tooLarge && LOG.isDebugEnabled()) {
+      LOG.debug("CellTooLarge: size={}, blockSize={}, maxCacheSize={}, 
table={}, cell={}", cellSize,
+        blockSize, maxCacheSize, table, CellUtil.toString(cell, false));
+    }
+
+    if (cellSize > largestCellBytes) {
+      largestCellRef = cell;
+      largestCellBytes = cellSize;
+    }
+    valueSizeBuckets.add(cell.getValueLength());
+
+    totalCells++;
+    if (CellUtil.isDelete(cell)) {
+      totalDeletes++;
+    }
+    totalBytes += cellSize;
+  }
+
+  /**
+   * Clone the cell refs so they can be cleaned up by {@link 
Shipper#shipped()}. Doing this lazily

Review Comment:
   Hmm gotta think about this one



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to