[
https://issues.apache.org/jira/browse/HIVE-24895?focusedWorklogId=574885&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-574885
]
ASF GitHub Bot logged work on HIVE-24895:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 31/Mar/21 14:37
Start Date: 31/Mar/21 14:37
Worklog Time Spent: 10m
Work Description: pkumarsinha commented on a change in pull request #2083:
URL: https://github.com/apache/hive/pull/2083#discussion_r604947354
##########
File path:
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
##########
@@ -1606,4 +1613,78 @@ public void testDatabaseLevelCopyDisabled() throws
Throwable {
ReplicationTestUtils.assertExternalFileList(Arrays
.asList("a", "b", "c", "newin", "newout"), tuple.dumpLocation,
primary);
}
+
+ @Test
+ public void testDataCopyEndLogAtSource() throws Throwable {
+ testDataCopyEndLog(false);
+ }
+
+ @Test
+ public void testDataCopyEndLogAtTarget() throws Throwable {
+ testDataCopyEndLog(true);
+ }
+
+ public void testDataCopyEndLog(boolean runCopyTasksOnTarget) throws
Throwable {
+ // Get the logger at the root level.
+ Logger logger = LogManager.getLogger("hive.ql.metadata.Hive");
+ LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
+ Configuration config = ctx.getConfiguration();
+ LoggerConfig loggerConfig = config.getLoggerConfig(logger.getName());
+ loggerConfig.setLevel(Level.DEBUG);
+ ctx.updateLoggers();
+ // Create a String Appender to capture log output
+
+ StringAppender appender = StringAppender.createStringAppender("%m");
+ appender.addToLogger(logger.getName(), Level.DEBUG);
+ appender.start();
+
+ List<String> withClause = Arrays.asList("'distcp.options.update'=''",
+ "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname
+ + "'='" + runCopyTasksOnTarget + "'");
+
+ // Perform bootstrap dump & load.
+ primary.run("use " + primaryDbName)
+ .run("create external table a (i int)")
+ .run("insert into a values (1),(2),(3),(4)")
+ .run("create external table b (i int)")
+ .run("insert into b values (5),(6),(7),(8)")
+ .dump(primaryDbName, withClause);
+
+ replica.load(replicatedDbName, primaryDbName, withClause)
+ .run("use " + replicatedDbName)
+ .run("show tables like 'a'")
+ .verifyResults(Collections.singletonList("a"))
+ .run("show tables like 'b'")
+ .verifyResults(Collections.singletonList("b"));
+
+ // Check whether the log contains DATA_COPY_END
+ String logStr = appender.getOutput();
+ assertTrue(logStr, logStr.contains("REPL::DATA_COPY_END:"));
+ logStr.indexOf("REPL::DATA_COPY_END:");
+ // Check the log contains DATA_COPY_END after Distcp
+ assertTrue(logStr, logStr.contains("DistCpOptions"));
+ String postLog = logStr.substring(logStr.indexOf("REPL::DATA_COPY_END:"));
+
+ assertFalse(postLog, postLog.contains("DistCpOptions"));
+ appender.reset();
+
+ // Perform incremental dump & load.
+ primary.run("create table c (i int)")
+ .run("insert into c values (10),(11)")
+ .run("insert into a values (5),(6)")
+ .run("insert into b values (9),(10)").dump(primaryDbName, withClause);
+
+ replica.load(replicatedDbName, primaryDbName, withClause)
+ .run("select i From c")
+ .verifyResults(new String[] {"10", "11"});
+
+ // Check whether the log contains DATA_COPY_END
+ logStr = appender.getOutput();
+ assertTrue(logStr, logStr.contains("REPL::DATA_COPY_END:"));
+ logStr.indexOf("REPL::DATA_COPY_END:");
Review comment:
Didn't get the intent here. Is this required?
##########
File path:
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
##########
@@ -1606,4 +1613,78 @@ public void testDatabaseLevelCopyDisabled() throws
Throwable {
ReplicationTestUtils.assertExternalFileList(Arrays
.asList("a", "b", "c", "newin", "newout"), tuple.dumpLocation,
primary);
}
+
+ @Test
+ public void testDataCopyEndLogAtSource() throws Throwable {
+ testDataCopyEndLog(false);
+ }
+
+ @Test
+ public void testDataCopyEndLogAtTarget() throws Throwable {
+ testDataCopyEndLog(true);
+ }
+
+ public void testDataCopyEndLog(boolean runCopyTasksOnTarget) throws
Throwable {
+ // Get the logger at the root level.
+ Logger logger = LogManager.getLogger("hive.ql.metadata.Hive");
+ LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
+ Configuration config = ctx.getConfiguration();
+ LoggerConfig loggerConfig = config.getLoggerConfig(logger.getName());
+ loggerConfig.setLevel(Level.DEBUG);
+ ctx.updateLoggers();
+ // Create a String Appender to capture log output
+
+ StringAppender appender = StringAppender.createStringAppender("%m");
+ appender.addToLogger(logger.getName(), Level.DEBUG);
+ appender.start();
+
+ List<String> withClause = Arrays.asList("'distcp.options.update'=''",
+ "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname
+ + "'='" + runCopyTasksOnTarget + "'");
+
+ // Perform bootstrap dump & load.
+ primary.run("use " + primaryDbName)
+ .run("create external table a (i int)")
+ .run("insert into a values (1),(2),(3),(4)")
+ .run("create external table b (i int)")
+ .run("insert into b values (5),(6),(7),(8)")
+ .dump(primaryDbName, withClause);
+
+ replica.load(replicatedDbName, primaryDbName, withClause)
+ .run("use " + replicatedDbName)
+ .run("show tables like 'a'")
+ .verifyResults(Collections.singletonList("a"))
+ .run("show tables like 'b'")
+ .verifyResults(Collections.singletonList("b"));
+
+ // Check whether the log contains DATA_COPY_END
+ String logStr = appender.getOutput();
+ assertTrue(logStr, logStr.contains("REPL::DATA_COPY_END:"));
+ logStr.indexOf("REPL::DATA_COPY_END:");
+ // Check the log contains DATA_COPY_END after Distcp
+ assertTrue(logStr, logStr.contains("DistCpOptions"));
+ String postLog = logStr.substring(logStr.indexOf("REPL::DATA_COPY_END:"));
+
+ assertFalse(postLog, postLog.contains("DistCpOptions"));
+ appender.reset();
+
+ // Perform incremental dump & load.
+ primary.run("create table c (i int)")
+ .run("insert into c values (10),(11)")
+ .run("insert into a values (5),(6)")
+ .run("insert into b values (9),(10)").dump(primaryDbName, withClause);
+
+ replica.load(replicatedDbName, primaryDbName, withClause)
+ .run("select i From c")
+ .verifyResults(new String[] {"10", "11"});
+
+ // Check whether the log contains DATA_COPY_END
+ logStr = appender.getOutput();
+ assertTrue(logStr, logStr.contains("REPL::DATA_COPY_END:"));
+ logStr.indexOf("REPL::DATA_COPY_END:");
+ // Check whether the log contains DATA_COPY_END
+ assertTrue(logStr, logStr.contains("DistCpOptions"));
Review comment:
Actually we can add an assertion on this:
logStr.indexOf("REPL::DATA_COPY_END:") > logStr.lastIndexOf("Completed
DirCopyTask for source")
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 574885)
Time Spent: 1h 10m (was: 1h)
> Add a DataCopyEnd stage in ReplStateLogTask for external table replication
> --------------------------------------------------------------------------
>
> Key: HIVE-24895
> URL: https://issues.apache.org/jira/browse/HIVE-24895
> Project: Hive
> Issue Type: Improvement
> Reporter: Ayush Saxena
> Assignee: Ayush Saxena
> Priority: Major
> Labels: pull-request-available
> Time Spent: 1h 10m
> Remaining Estimate: 0h
>
> Add a task to mark the end of external table copy.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)