virajjasani commented on a change in pull request #1681:
URL: https://github.com/apache/hbase/pull/1681#discussion_r426177730



##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/slowlog/SlowLogRecorder.java
##########
@@ -86,7 +87,9 @@ public SlowLogRecorder(Configuration conf) {
     this.disruptor.setDefaultExceptionHandler(new DisruptorExceptionHandler());
 
     // initialize ringbuffer event handler
-    this.logEventHandler = new LogEventHandler(this.eventCount);
+    final boolean isSlowLogTableEnabled = 
conf.getBoolean(HConstants.SLOW_LOG_SYS_TABLE_ENABLED_KEY,

Review comment:
       Yes, we will create that handler because that is primarily used for 
in-memory ring buffer. That part is fine, nothing to worry 👍 

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/slowlog/SlowLogTableAccessor.java
##########
@@ -0,0 +1,140 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.regionserver.slowlog;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.TooSlowLog;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Slowlog Accessor to record slow/large RPC log identified at each 
RegionServer RpcServer level.
+ * This can be done only optionally to record the entire history of slow/large 
rpc calls
+ * since RingBuffer can handle only limited latest records.
+ */
[email protected]
+public class SlowLogTableAccessor {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(SlowLogTableAccessor.class);
+
+  private static final Random RANDOM = new Random();
+
+  private static Connection connection;
+
+  private static void doPut(final Connection connection, final List<Put> puts)
+      throws IOException {
+    try (Table table = connection.getTable(TableName.SLOW_LOG_TABLE_NAME)) {
+      table.put(puts);
+    }
+  }
+
+  /**
+   * Add slow/large log records to hbase:slowlog table
+   * @param slowLogPayloads List of SlowLogPayload to process
+   * @param configuration Configuration to use for connection
+   */
+  public static void addSlowLogRecords(final List<TooSlowLog.SlowLogPayload> 
slowLogPayloads,
+      final Configuration configuration) {
+    List<Put> puts = new ArrayList<>(slowLogPayloads.size());
+    for (TooSlowLog.SlowLogPayload slowLogPayload : slowLogPayloads) {
+      final byte[] rowKey = getRowKey(slowLogPayload);
+      final Put put = new Put(rowKey).setDurability(Durability.SKIP_WAL)
+        .addColumn(HConstants.SLOWLOG_INFO_FAMILY, 
Bytes.toBytes("call_details"),
+          Bytes.toBytes(slowLogPayload.getCallDetails()))
+        .addColumn(HConstants.SLOWLOG_INFO_FAMILY, 
Bytes.toBytes("client_address"),
+          Bytes.toBytes(slowLogPayload.getClientAddress()))
+        .addColumn(HConstants.SLOWLOG_INFO_FAMILY, 
Bytes.toBytes("method_name"),
+          Bytes.toBytes(slowLogPayload.getMethodName()))
+        .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("param"),
+          Bytes.toBytes(slowLogPayload.getParam()))
+        .addColumn(HConstants.SLOWLOG_INFO_FAMILY, 
Bytes.toBytes("processing_time"),
+          Bytes.toBytes(Integer.toString(slowLogPayload.getProcessingTime())))
+        .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("queue_time"),
+          Bytes.toBytes(Integer.toString(slowLogPayload.getQueueTime())))
+        .addColumn(HConstants.SLOWLOG_INFO_FAMILY, 
Bytes.toBytes("region_name"),
+          Bytes.toBytes(slowLogPayload.getRegionName()))
+        .addColumn(HConstants.SLOWLOG_INFO_FAMILY, 
Bytes.toBytes("response_size"),
+          Bytes.toBytes(Long.toString(slowLogPayload.getResponseSize())))
+        .addColumn(HConstants.SLOWLOG_INFO_FAMILY, 
Bytes.toBytes("server_class"),
+          Bytes.toBytes(slowLogPayload.getServerClass()))
+        .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("start_time"),
+          Bytes.toBytes(Long.toString(slowLogPayload.getStartTime())))
+        .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("type"),
+          Bytes.toBytes(slowLogPayload.getType().name()))
+        .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("username"),
+          Bytes.toBytes(slowLogPayload.getUserName()));
+      puts.add(put);
+    }
+    try {
+      if (connection == null) {
+        synchronized (SlowLogTableAccessor.class) {
+          if (connection == null) {
+            Configuration conf = new Configuration(configuration);
+            // rpc timeout: 20s
+            conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 20000);
+            // retry count: 5
+            conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+            conf.setInt(HConstants.HBASE_CLIENT_SERVERSIDE_RETRIES_MULTIPLIER, 
1);
+            connection = ConnectionFactory.createConnection(conf);
+          }
+        }
+      }
+      doPut(connection, puts);

Review comment:
       Sure that should be fine, but since we already have (WAL disabled + 
block cache disabled). Do you think it is fine to not touch Rpc handler level 
priority? We already don't have tight consistency and resiliency for this 
system table, so there are already chances of losing some data :) 
   Thought? I am fine with lowering the priority anyways.

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/slowlog/SlowLogTableAccessor.java
##########
@@ -0,0 +1,120 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.regionserver.slowlog;
+
+import java.io.IOException;
+import java.util.Random;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.TooSlowLog;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Slowlog Accessor to record slow/large RPC log identified at each 
RegionServer RpcServer level.
+ * This can be done only optionally to record the entire history of slow/large 
rpc calls
+ * since RingBuffer can handle only limited latest records.
+ */
[email protected]
+public class SlowLogTableAccessor {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(SlowLogTableAccessor.class);
+
+  private static final Random RANDOM = new Random();
+
+  private static void doPut(final Connection connection, final Put put)
+      throws IOException {
+    try (Table table = connection.getTable(TableName.SLOW_LOG_TABLE_NAME)) {
+      table.put(put);
+    }
+  }
+
+  /**
+   * Add slow/large log records to hbase:slowlog table
+   *
+   * @param slowLogPayload SlowLogPayload to process
+   * @param connection Connection to put data
+   */
+  public static void addSlowLogRecord(final TooSlowLog.SlowLogPayload 
slowLogPayload,
+      final Connection connection) {
+    final byte[] rowKey = getRowKey(slowLogPayload);
+    final Put put = new Put(rowKey)
+      .setDurability(Durability.SKIP_WAL)
+      .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("call_details"),
+        Bytes.toBytes(slowLogPayload.getCallDetails()))
+      .addColumn(HConstants.SLOWLOG_INFO_FAMILY, 
Bytes.toBytes("client_address"),
+        Bytes.toBytes(slowLogPayload.getClientAddress()))
+      .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("method_name"),
+        Bytes.toBytes(slowLogPayload.getMethodName()))
+      .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("param"),
+        Bytes.toBytes(slowLogPayload.getParam()))
+      .addColumn(HConstants.SLOWLOG_INFO_FAMILY, 
Bytes.toBytes("processing_time"),
+        Bytes.toBytes(slowLogPayload.getProcessingTime()))
+      .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("queue_time"),
+        Bytes.toBytes(slowLogPayload.getQueueTime()))
+      .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("region_name"),
+        Bytes.toBytes(slowLogPayload.getRegionName()))
+      .addColumn(HConstants.SLOWLOG_INFO_FAMILY, 
Bytes.toBytes("response_size"),
+        Bytes.toBytes(slowLogPayload.getResponseSize()))
+      .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("server_class"),
+        Bytes.toBytes(slowLogPayload.getServerClass()))
+      .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("start_time"),
+        Bytes.toBytes(slowLogPayload.getStartTime()))
+      .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("type"),
+        Bytes.toBytes(slowLogPayload.getType().name()))
+      .addColumn(HConstants.SLOWLOG_INFO_FAMILY, Bytes.toBytes("username"),
+        Bytes.toBytes(slowLogPayload.getUserName()));
+    try {
+      doPut(connection, put);
+    } catch (IOException e) {
+      LOG.error("Failed to add slow/large log record to hbase:slowlog table 
for region: {}",
+        slowLogPayload.getRegionName(), e);
+    }
+  }
+
+  /**
+   * Create rowKey: currentTimeMillis APPEND slowLogPayload.hashcode
+   * Scan on slowlog table should keep records with sorted order of time, 
however records
+   * added at the very same time (currentTimeMillis) could be in random order.
+   *
+   * @param slowLogPayload SlowLogPayload to process
+   * @return rowKey byte[]
+   */
+  private static byte[] getRowKey(final TooSlowLog.SlowLogPayload 
slowLogPayload) {

Review comment:
       `DEFAULT_SLOW_LOG_RING_BUFFER_SIZE = 256` this is for in-memory ring 
buffer only. For 10 min window, yes we will keep records in memory until we can 
insert them together by cron. And the size for the memory queue is 1000. The 
code is here:
   ```
       if (isSlowLogTableEnabled) {
         EvictingQueue<SlowLogPayload> evictingQueueForTable = 
EvictingQueue.create(
           SYS_TABLE_QUEUE_SIZE);
         queueForSysTable = Queues.synchronizedQueue(evictingQueueForTable);
       } else {
         queueForSysTable = null;
       }
   ```
   
   > this system table if not coming online due to some assignment issues - we 
don't bother right?
   
   That is true, and yes warn should be present, which is provided here:
   
   ```
       } catch (Exception e) {
         LOG.warn("Failed to add slow/large log records to hbase:slowlog 
table.", e);
       }
   ```
   
   Retries and timeout is also defined here:
   ```
         Configuration conf = new Configuration(configuration);
         // rpc timeout: 20s
         conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 20000);
         // retry count: 5
         conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
         conf.setInt(HConstants.HBASE_CLIENT_SERVERSIDE_RETRIES_MULTIPLIER, 1);
         connection = ConnectionFactory.createConnection(conf);
   ```

##########
File path: hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
##########
@@ -1532,6 +1532,16 @@
     "hbase.regionserver.slowlog.buffer.enabled";
   public static final boolean DEFAULT_ONLINE_LOG_PROVIDER_ENABLED = false;
 
+  /** The slowlog info family as a string*/
+  private static final String SLOWLOG_INFO_FAMILY_STR = "info";
+
+  /** The slowlog info family */
+  public static final byte [] SLOWLOG_INFO_FAMILY = 
Bytes.toBytes(SLOWLOG_INFO_FAMILY_STR);
+
+  public static final String SLOW_LOG_SYS_TABLE_ENABLED_KEY =
+    "hbase.regionserver.slowlog.systable.enabled";
+  public static final boolean DEFAULT_SLOW_LOG_SYS_TABLE_ENABLED_KEY = false;

Review comment:
       On high level, it will be redundant but if we are looking for 
completeness of logs, this might provide better details. Because as of today, 
we log truncated data at RpcServer level. At times, we miss on huge region name.
   ```
   e.g
   "param":"region { type: REGION_NAME value: 
\"t1,\\000\\000\\215\\f)o\\\\\\024\\302\\220\\000\\000\\000\\000\\000\\001\\000\\000\\000\\000\\000\\006\\000\\000\\000\\000\\000\\005
   000000<TRUNCATED>"
   ```
   
   But regardless, this is not supposed to be enabled by default for sure. 
Ringbuffer feature is also so far disabled (by default). Maybe we can plan to 
enable it by default starting release 3.0.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to