[ 
https://issues.apache.org/jira/browse/HIVE-22954?focusedWorklogId=398305&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-398305
 ]

ASF GitHub Bot logged work on HIVE-22954:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 05/Mar/20 10:58
            Start Date: 05/Mar/20 10:58
    Worklog Time Spent: 10m 
      Work Description: aasha commented on pull request #932: HIVE-22954 Repl 
Load using scheduler
URL: https://github.com/apache/hive/pull/932#discussion_r388220832
 
 

 ##########
 File path: 
ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java
 ##########
 @@ -387,27 +388,80 @@ private void analyzeReplLoad(ASTNode ast) throws 
SemanticException {
       // At this point, all dump dirs should contain a _dumpmetadata file that
       // tells us what is inside that dumpdir.
 
-      DumpMetaData dmd = new DumpMetaData(loadPath, conf);
-
-      boolean evDump = false;
-      // we will decide what hdfs locations needs to be copied over here as 
well.
-      if (dmd.isIncrementalDump()) {
-        LOG.debug("{} contains an incremental dump", loadPath);
-        evDump = true;
+      //If repl status of target is greater than dumps, don't do anything as 
the load for the latest dump is done
+      if (isTargetAlreadyLoaded) {
+        return;
       } else {
-        LOG.debug("{} contains an bootstrap dump", loadPath);
+        DumpMetaData dmd = new DumpMetaData(loadPath, conf);
+
+        boolean evDump = false;
+        // we will decide what hdfs locations needs to be copied over here as 
well.
+        if (dmd.isIncrementalDump()) {
+          LOG.debug("{} contains an incremental dump", loadPath);
+          evDump = true;
+        } else {
+          LOG.debug("{} contains an bootstrap dump", loadPath);
+        }
+        ReplLoadWork replLoadWork = new ReplLoadWork(conf, 
loadPath.toString(), replScope.getDbName(),
+                dmd.getReplScope(),
+                queryState.getLineageState(), evDump, dmd.getEventTo(),
+                dirLocationsToCopy(loadPath, evDump));
+        rootTasks.add(TaskFactory.get(replLoadWork, conf));
       }
-      ReplLoadWork replLoadWork = new ReplLoadWork(conf, loadPath.toString(), 
replScope.getDbName(),
-              dmd.getReplScope(),
-              queryState.getLineageState(), evDump, dmd.getEventTo(),
-          dirLocationsToCopy(loadPath, evDump));
-      rootTasks.add(TaskFactory.get(replLoadWork, conf));
     } catch (Exception e) {
       // TODO : simple wrap & rethrow for now, clean up with error codes
       throw new SemanticException(e.getMessage(), e);
     }
   }
 
+  private Path getCurrentLoadPath() throws IOException, SemanticException {
+    Path loadPathBase = new Path(conf.getVar(HiveConf.ConfVars.REPLDIR),
+            
Base64.getEncoder().encodeToString(sourceDbNameOrPattern.toLowerCase()
+            .getBytes(StandardCharsets.UTF_8.name())));
+    final FileSystem fs = loadPathBase.getFileSystem(conf);
+
+    // Make fully qualified path for further use.
+    loadPathBase = fs.makeQualified(loadPathBase);
+
+    if (!fs.exists(loadPathBase)) {
+      // supposed dump path does not exist.
+      LOG.error("File not found " + loadPathBase.toUri().toString());
+      throw new 
FileNotFoundException(ErrorMsg.REPL_LOAD_PATH_NOT_FOUND.getMsg());
+    }
+    FileStatus[] statuses = 
loadPathBase.getFileSystem(conf).listStatus(loadPathBase);
+    if (statuses.length > 0) {
+      //sort based on last modified. Recent one is at the end
+      Arrays.sort(statuses, new Comparator<FileStatus>() {
+        public int compare(FileStatus f1, FileStatus f2) {
+          return Long.compare(f1.getModificationTime(), 
f2.getModificationTime());
+        }
+      });
+      if (replScope.getDbName() != null) {
+        String currentReplStatusOfTarget
+                = getReplStatus(replScope.getDbName());
+        if (currentReplStatusOfTarget == null) { //bootstrap
+          return statuses[0].getPath();
+        } else {
+          DumpMetaData latestDump = new DumpMetaData(statuses[statuses.length 
- 1].getPath(), conf);
+          if (Long.parseLong(currentReplStatusOfTarget) >= 
latestDump.getEventTo().longValue()) {
+            isTargetAlreadyLoaded = true;
+          } else {
+            for (FileStatus status : statuses) {
+              DumpMetaData dmd = new DumpMetaData(status.getPath(), conf);
+              if (dmd.isIncrementalDump()
+                      && Long.parseLong(currentReplStatusOfTarget) < 
dmd.getEventTo().longValue()) {
 
 Review comment:
   There are test cases where incremental event starts from say (3-15) and 
current status of  target is at 10 because of the previous partial incremental 
load.
   Like 
TestReplicationWithTableMigration#testIncrementalLoadMigrationManagedToAcidFailurePart
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 398305)
    Time Spent: 50m  (was: 40m)

> Schedule Repl Load using Hive Scheduler
> ---------------------------------------
>
>                 Key: HIVE-22954
>                 URL: https://issues.apache.org/jira/browse/HIVE-22954
>             Project: Hive
>          Issue Type: Task
>            Reporter: Aasha Medhi
>            Assignee: Aasha Medhi
>            Priority: Major
>              Labels: pull-request-available
>         Attachments: HIVE-22954.01.patch, HIVE-22954.02.patch, 
> HIVE-22954.03.patch, HIVE-22954.04.patch, HIVE-22954.05.patch, 
> HIVE-22954.06.patch, HIVE-22954.07.patch, HIVE-22954.08.patch, 
> HIVE-22954.09.patch, HIVE-22954.10.patch, HIVE-22954.11.patch, 
> HIVE-22954.12.patch, HIVE-22954.13.patch, HIVE-22954.15.patch, 
> HIVE-22954.16.patch, HIVE-22954.17.patch, HIVE-22954.patch
>
>          Time Spent: 50m
>  Remaining Estimate: 0h
>
> [https://github.com/apache/hive/pull/932]



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to