[ 
https://issues.apache.org/jira/browse/HIVE-22954?focusedWorklogId=398272&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-398272
 ]

ASF GitHub Bot logged work on HIVE-22954:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 05/Mar/20 10:23
            Start Date: 05/Mar/20 10:23
    Worklog Time Spent: 10m 
      Work Description: anishek commented on pull request #932: HIVE-22954 Repl 
Load using scheduler
URL: https://github.com/apache/hive/pull/932#discussion_r388186231
 
 

 ##########
 File path: 
ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java
 ##########
 @@ -387,27 +388,80 @@ private void analyzeReplLoad(ASTNode ast) throws 
SemanticException {
       // At this point, all dump dirs should contain a _dumpmetadata file that
       // tells us what is inside that dumpdir.
 
-      DumpMetaData dmd = new DumpMetaData(loadPath, conf);
-
-      boolean evDump = false;
-      // we will decide what hdfs locations needs to be copied over here as 
well.
-      if (dmd.isIncrementalDump()) {
-        LOG.debug("{} contains an incremental dump", loadPath);
-        evDump = true;
+      //If repl status of target is greater than dumps, don't do anything as 
the load for the latest dump is done
+      if (isTargetAlreadyLoaded) {
+        return;
       } else {
-        LOG.debug("{} contains an bootstrap dump", loadPath);
+        DumpMetaData dmd = new DumpMetaData(loadPath, conf);
+
+        boolean evDump = false;
+        // we will decide what hdfs locations needs to be copied over here as 
well.
+        if (dmd.isIncrementalDump()) {
+          LOG.debug("{} contains an incremental dump", loadPath);
+          evDump = true;
+        } else {
+          LOG.debug("{} contains an bootstrap dump", loadPath);
+        }
+        ReplLoadWork replLoadWork = new ReplLoadWork(conf, 
loadPath.toString(), replScope.getDbName(),
+                dmd.getReplScope(),
+                queryState.getLineageState(), evDump, dmd.getEventTo(),
+                dirLocationsToCopy(loadPath, evDump));
+        rootTasks.add(TaskFactory.get(replLoadWork, conf));
       }
-      ReplLoadWork replLoadWork = new ReplLoadWork(conf, loadPath.toString(), 
replScope.getDbName(),
-              dmd.getReplScope(),
-              queryState.getLineageState(), evDump, dmd.getEventTo(),
-          dirLocationsToCopy(loadPath, evDump));
-      rootTasks.add(TaskFactory.get(replLoadWork, conf));
     } catch (Exception e) {
       // TODO : simple wrap & rethrow for now, clean up with error codes
       throw new SemanticException(e.getMessage(), e);
     }
   }
 
+  private Path getCurrentLoadPath() throws IOException, SemanticException {
+    Path loadPathBase = new Path(conf.getVar(HiveConf.ConfVars.REPLDIR),
+            
Base64.getEncoder().encodeToString(sourceDbNameOrPattern.toLowerCase()
+            .getBytes(StandardCharsets.UTF_8.name())));
+    final FileSystem fs = loadPathBase.getFileSystem(conf);
+
+    // Make fully qualified path for further use.
+    loadPathBase = fs.makeQualified(loadPathBase);
+
+    if (!fs.exists(loadPathBase)) {
+      // supposed dump path does not exist.
+      LOG.error("File not found " + loadPathBase.toUri().toString());
+      throw new 
FileNotFoundException(ErrorMsg.REPL_LOAD_PATH_NOT_FOUND.getMsg());
+    }
+    FileStatus[] statuses = 
loadPathBase.getFileSystem(conf).listStatus(loadPathBase);
+    if (statuses.length > 0) {
+      //sort based on last modified. Recent one is at the end
+      Arrays.sort(statuses, new Comparator<FileStatus>() {
+        public int compare(FileStatus f1, FileStatus f2) {
+          return Long.compare(f1.getModificationTime(), 
f2.getModificationTime());
+        }
+      });
+      if (replScope.getDbName() != null) {
+        String currentReplStatusOfTarget
+                = getReplStatus(replScope.getDbName());
+        if (currentReplStatusOfTarget == null) { //bootstrap
+          return statuses[0].getPath();
+        } else {
+          DumpMetaData latestDump = new DumpMetaData(statuses[statuses.length 
- 1].getPath(), conf);
+          if (Long.parseLong(currentReplStatusOfTarget) >= 
latestDump.getEventTo().longValue()) {
+            isTargetAlreadyLoaded = true;
+          } else {
+            for (FileStatus status : statuses) {
+              DumpMetaData dmd = new DumpMetaData(status.getPath(), conf);
+              if (dmd.isIncrementalDump()
+                      && Long.parseLong(currentReplStatusOfTarget) < 
dmd.getEventTo().longValue()) {
+                return status.getPath();
+              }
+            }
+          }
+        }
+      }
+      //If dbname is null(in case of repl load *), can't get repl status of 
target, return the latest dump
+      return statuses[statuses.length - 1].getPath();
 
 Review comment:
   this use case is not supported, would be better throw 
UnsupportedOperationException here.
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 398272)
    Time Spent: 20m  (was: 10m)

> Schedule Repl Load using Hive Scheduler
> ---------------------------------------
>
>                 Key: HIVE-22954
>                 URL: https://issues.apache.org/jira/browse/HIVE-22954
>             Project: Hive
>          Issue Type: Task
>            Reporter: Aasha Medhi
>            Assignee: Aasha Medhi
>            Priority: Major
>              Labels: pull-request-available
>         Attachments: HIVE-22954.01.patch, HIVE-22954.02.patch, 
> HIVE-22954.03.patch, HIVE-22954.04.patch, HIVE-22954.05.patch, 
> HIVE-22954.06.patch, HIVE-22954.07.patch, HIVE-22954.08.patch, 
> HIVE-22954.09.patch, HIVE-22954.10.patch, HIVE-22954.11.patch, 
> HIVE-22954.12.patch, HIVE-22954.13.patch, HIVE-22954.15.patch, 
> HIVE-22954.16.patch, HIVE-22954.17.patch, HIVE-22954.patch
>
>          Time Spent: 20m
>  Remaining Estimate: 0h
>
> [https://github.com/apache/hive/pull/932]



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to