[
https://issues.apache.org/jira/browse/HIVE-25204?focusedWorklogId=611172&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-611172
]
ASF GitHub Bot logged work on HIVE-25204:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 15/Jun/21 07:35
Start Date: 15/Jun/21 07:35
Worklog Time Spent: 10m
Work Description: maheshk114 commented on a change in pull request #2365:
URL: https://github.com/apache/hive/pull/2365#discussion_r651526249
##########
File path:
hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java
##########
@@ -1194,108 +1205,82 @@ static String quoteString(String input) {
private void addNotificationLog(NotificationEvent event, ListenerEvent
listenerEvent, Connection dbConn,
SQLGenerator sqlGenerator) throws
MetaException, SQLException {
LOG.debug("DbNotificationListener: adding notification log for : {}",
event.getMessage());
+ addNotificationLogBatch(Collections.singletonList(event),
Collections.singletonList(listenerEvent),
+ dbConn, sqlGenerator);
+ }
+
+ private void addNotificationLogBatch(List<NotificationEvent> eventList,
List<ListenerEvent> listenerEventList,
+ Connection dbConn, SQLGenerator sqlGenerator)
throws MetaException, SQLException {
if ((dbConn == null) || (sqlGenerator == null)) {
LOG.info("connection or sql generator is not set so executing sql via
DN");
- process(event, listenerEvent);
+ for (int idx = 0; idx < eventList.size(); idx++) {
+ LOG.debug("DbNotificationListener: adding notification log for : {}",
eventList.get(idx).getMessage());
+ process(eventList.get(idx), listenerEventList.get(idx));
+ }
return;
}
- Statement stmt = null;
- PreparedStatement pst = null;
- ResultSet rs = null;
- try {
- stmt = dbConn.createStatement();
- event.setMessageFormat(msgEncoder.getMessageFormat());
+ try (Statement stmt = dbConn.createStatement()) {
if (sqlGenerator.getDbProduct().isMYSQL()) {
stmt.execute("SET @@session.sql_mode=ANSI_QUOTES");
}
+ }
- long nextEventId = getNextEventId(dbConn, sqlGenerator);
-
- long nextNLId = getNextNLId(dbConn, sqlGenerator,
- "org.apache.hadoop.hive.metastore.model.MNotificationLog");
-
- String insertVal;
- String columns;
- List<String> params = new ArrayList<String>();
-
- // Construct the values string, parameters and column string step by
step simultaneously so
- // that the positions of columns and of their corresponding values do
not go out of sync.
-
- // Notification log id
- columns = "\"NL_ID\"";
- insertVal = "" + nextNLId;
-
- // Event id
- columns = columns + ", \"EVENT_ID\"";
- insertVal = insertVal + "," + nextEventId;
-
- // Event time
- columns = columns + ", \"EVENT_TIME\"";
- insertVal = insertVal + "," + event.getEventTime();
+ long nextEventId = getNextEventId(dbConn, sqlGenerator, eventList.size());
+ long nextNLId = getNextNLId(dbConn, sqlGenerator,
+ "org.apache.hadoop.hive.metastore.model.MNotificationLog",
eventList.size());
- // Event type
- columns = columns + ", \"EVENT_TYPE\"";
- insertVal = insertVal + ", ?";
- params.add(event.getEventType());
+ String columns = "\"NL_ID\"" + ", \"EVENT_ID\"" + ", \"EVENT_TIME\"" + ",
\"EVENT_TYPE\"" + ", \"MESSAGE\""
+ + ", \"MESSAGE_FORMAT\"" + ", \"DB_NAME\"" + ", \"TBL_NAME\"" + ",
\"CAT_NAME\"";
+ String insertVal = "insert into \"NOTIFICATION_LOG\" (" + columns + ")
VALUES ("
+ + "?,?,?,?,?,?,?,?,?"
+ + ")";
- // Message
- columns = columns + ", \"MESSAGE\"";
- insertVal = insertVal + ", ?";
- params.add(event.getMessage());
+ try (PreparedStatement pst = dbConn.prepareStatement(insertVal)) {
+ int numRows = 0;
- // Message format
- columns = columns + ", \"MESSAGE_FORMAT\"";
- insertVal = insertVal + ", ?";
- params.add(event.getMessageFormat());
+ for (int idx = 0; idx < eventList.size(); idx++) {
+ NotificationEvent event = eventList.get(idx);
+ ListenerEvent listenerEvent = listenerEventList.get(idx);
- // Database name, optional
- String dbName = event.getDbName();
- if (dbName != null) {
- assert dbName.equals(dbName.toLowerCase());
- columns = columns + ", \"DB_NAME\"";
- insertVal = insertVal + ", ?";
- params.add(dbName);
- }
+ LOG.debug("DbNotificationListener: adding notification log for : {}",
event.getMessage());
+ event.setMessageFormat(msgEncoder.getMessageFormat());
- // Table name, optional
- String tableName = event.getTableName();
- if (tableName != null) {
- assert tableName.equals(tableName.toLowerCase());
- columns = columns + ", \"TBL_NAME\"";
- insertVal = insertVal + ", ?";
- params.add(tableName);
- }
+ pst.setLong(1, nextNLId);
+ pst.setLong(2, nextEventId);
+ pst.setLong(3, now());
+ pst.setString(4, event.getEventType());
+ pst.setString(5, event.getMessage());
+ pst.setString(6, event.getMessageFormat());
+ pst.setString(7, event.getDbName());
+ pst.setString(8, event.getTableName());
+ pst.setString(9, event.getCatName());
+
+ LOG.debug("Going to execute insert <" + insertVal + ">");
+ pst.addBatch();
+ numRows++;
+
+ if (numRows == maxBatchSize) {
+ pst.executeBatch();
+ numRows = 0;
+ }
- // Catalog name, optional
- String catName = event.getCatName();
- if (catName != null) {
- assert catName.equals(catName.toLowerCase());
- columns = columns + ", \"CAT_NAME\"";
- insertVal = insertVal + ", ?";
- params.add(catName);
+ event.setEventId(nextEventId);
+ // Set the DB_NOTIFICATION_EVENT_ID for future reference by other
listeners.
+ if (event.isSetEventId()) {
+ listenerEvent.putParameter(
+
MetaStoreEventListenerConstants.DB_NOTIFICATION_EVENT_ID_KEY_NAME,
+ Long.toString(event.getEventId()));
+ }
Review comment:
The batch operation and the batch writing on notification logs are done
within a single transaction to avoid inconsistency.
##########
File path:
hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java
##########
@@ -905,25 +909,32 @@ public void
onUpdatePartitionColumnStat(UpdatePartitionColumnStatEvent updatePar
}
@Override
- public void
onUpdatePartitionColumnStatDirectSql(UpdatePartitionColumnStatEvent
updatePartColStatEvent,
- Connection dbConn,
SQLGenerator sqlGenerator)
+ public void
onUpdatePartitionColumnStatInBatch(UpdatePartitionColumnStatEventBatch
updatePartColStatEventBatch,
Review comment:
updatePartitionColStatsInBatch internally uses
updatePartitionColStatsForOneBatch method.updatePartitionColStatsForOneBatch
calls direct sql method for one batch and that internally writes the
notification log.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 611172)
Time Spent: 40m (was: 0.5h)
> Reduce overhead of adding notification log for update partition column
> statistics
> ---------------------------------------------------------------------------------
>
> Key: HIVE-25204
> URL: https://issues.apache.org/jira/browse/HIVE-25204
> Project: Hive
> Issue Type: Sub-task
> Components: Hive, HiveServer2
> Reporter: mahesh kumar behera
> Assignee: mahesh kumar behera
> Priority: Major
> Labels: perfomance, pull-request-available
> Time Spent: 40m
> Remaining Estimate: 0h
>
> The notification logs for partition column statistics can be optimised by
> adding them in batch. In the current implementation its done one by one
> causing multiple sql execution in the backend RDBMS. These SQL executions can
> be batched to reduce the execution time.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)