This is an automated email from the ASF dual-hosted git repository.

wuzhiguo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/bigtop-manager.git


The following commit(s) were added to refs/heads/main by this push:
     new ae8e43d  BIGTOP-4158: Support service add job retry when failed (#17)
ae8e43d is described below

commit ae8e43d4a37c11068f77c28cba38050bacae2fd8
Author: Zhiguo Wu <[email protected]>
AuthorDate: Mon Jul 15 10:40:58 2024 +0800

    BIGTOP-4158: Support service add job retry when failed (#17)
---
 .../agent/service/CommandServiceGrpcImpl.java      | 20 +++++++++
 .../agent/service/TaskLogServiceGrpcImpl.java      | 22 +---------
 .../bigtop/manager/agent/utils/LogFileUtils.java   | 25 ++++++++---
 .../manager/server/controller/JobController.java   |  7 ++++
 .../manager/server/enums/ApiExceptionEnum.java     |  1 +
 .../bigtop/manager/server/enums/LocaleKeys.java    |  1 +
 .../bigtop/manager/server/service/JobService.java  |  2 +
 .../server/service/impl/JobServiceImpl.java        | 41 ++++++++++++++++++
 .../main/resources/i18n/messages_en_US.properties  |  1 +
 .../main/resources/i18n/messages_zh_CN.properties  |  1 +
 bigtop-manager-ui/src/api/job/index.ts             |  7 ++++
 .../src/components/service-add/install.vue         | 48 +++++++++++++++++++---
 bigtop-manager-ui/src/locales/en_US/common.ts      |  1 +
 bigtop-manager-ui/src/locales/zh_CN/common.ts      |  1 +
 14 files changed, 147 insertions(+), 31 deletions(-)

diff --git 
a/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/CommandServiceGrpcImpl.java
 
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/CommandServiceGrpcImpl.java
index ed1f829..c9d9368 100644
--- 
a/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/CommandServiceGrpcImpl.java
+++ 
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/CommandServiceGrpcImpl.java
@@ -21,6 +21,7 @@ package org.apache.bigtop.manager.agent.service;
 import org.apache.bigtop.manager.agent.cache.Caches;
 import org.apache.bigtop.manager.agent.executor.CommandExecutor;
 import org.apache.bigtop.manager.agent.executor.CommandExecutors;
+import org.apache.bigtop.manager.agent.utils.LogFileUtils;
 import org.apache.bigtop.manager.grpc.generated.CommandReply;
 import org.apache.bigtop.manager.grpc.generated.CommandRequest;
 import org.apache.bigtop.manager.grpc.generated.CommandServiceGrpc;
@@ -32,6 +33,10 @@ import io.grpc.stub.StreamObserver;
 import lombok.extern.slf4j.Slf4j;
 import net.devh.boot.grpc.server.service.GrpcService;
 
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
 @Slf4j
 @GrpcService
 public class CommandServiceGrpcImpl extends 
CommandServiceGrpc.CommandServiceImplBase {
@@ -39,6 +44,9 @@ public class CommandServiceGrpcImpl extends 
CommandServiceGrpc.CommandServiceImp
     @Override
     public void exec(CommandRequest request, StreamObserver<CommandReply> 
responseObserver) {
         try {
+            // Truncate old logs if exists, only useful when it's retry command
+            truncateLogFile(request.getTaskId());
+
             MDC.put("taskId", String.valueOf(request.getTaskId()));
             Caches.RUNNING_TASKS.add(request.getTaskId());
             CommandExecutor commandExecutor = 
CommandExecutors.getCommandExecutor(request.getType());
@@ -54,4 +62,16 @@ public class CommandServiceGrpcImpl extends 
CommandServiceGrpc.CommandServiceImp
             MDC.clear();
         }
     }
+
+    private void truncateLogFile(Long taskId) {
+        String filePath = LogFileUtils.getLogFilePath(taskId);
+        File file = new File(filePath);
+        if (file.exists()) {
+            try (RandomAccessFile rf = new RandomAccessFile(file, "rw")) {
+                rf.setLength(0);
+            } catch (IOException e) {
+                log.warn("Error when truncate file: {}", filePath, e);
+            }
+        }
+    }
 }
diff --git 
a/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/TaskLogServiceGrpcImpl.java
 
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/TaskLogServiceGrpcImpl.java
index 018ad0a..781cc3f 100644
--- 
a/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/TaskLogServiceGrpcImpl.java
+++ 
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/TaskLogServiceGrpcImpl.java
@@ -19,18 +19,16 @@
 package org.apache.bigtop.manager.agent.service;
 
 import org.apache.bigtop.manager.agent.cache.Caches;
+import org.apache.bigtop.manager.agent.utils.LogFileUtils;
 import org.apache.bigtop.manager.grpc.generated.TaskLogReply;
 import org.apache.bigtop.manager.grpc.generated.TaskLogRequest;
 import org.apache.bigtop.manager.grpc.generated.TaskLogServiceGrpc;
 
-import org.apache.commons.lang3.SystemUtils;
-
 import io.grpc.Status;
 import io.grpc.stub.StreamObserver;
 import lombok.extern.slf4j.Slf4j;
 import net.devh.boot.grpc.server.service.GrpcService;
 
-import java.io.File;
 import java.io.RandomAccessFile;
 import java.nio.charset.StandardCharsets;
 
@@ -40,7 +38,7 @@ public class TaskLogServiceGrpcImpl extends 
TaskLogServiceGrpc.TaskLogServiceImp
 
     @Override
     public void getLog(TaskLogRequest request, StreamObserver<TaskLogReply> 
responseObserver) {
-        String path = getLogFilePath(request.getTaskId());
+        String path = LogFileUtils.getLogFilePath(request.getTaskId());
         try (RandomAccessFile file = new RandomAccessFile(path, "r")) {
             // Read from beginning
             long fileLength = file.length();
@@ -86,20 +84,4 @@ public class TaskLogServiceGrpcImpl extends 
TaskLogServiceGrpc.TaskLogServiceImp
             }
         }
     }
-
-    private String getLogFilePath(Long taskId) {
-        String baseDir;
-        if (SystemUtils.IS_OS_WINDOWS) {
-            baseDir = SystemUtils.getUserDir().getPath();
-        } else {
-            File file = new File(this.getClass()
-                    .getProtectionDomain()
-                    .getCodeSource()
-                    .getLocation()
-                    .getPath());
-            baseDir = file.getParentFile().getParentFile().getPath();
-        }
-
-        return baseDir + File.separator + "tasklogs" + File.separator + 
"task-" + taskId + ".log";
-    }
 }
diff --git 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
 
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/utils/LogFileUtils.java
similarity index 53%
copy from 
bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
copy to 
bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/utils/LogFileUtils.java
index 117292f..e87929a 100644
--- 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
+++ 
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/utils/LogFileUtils.java
@@ -16,14 +16,27 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.bigtop.manager.server.service;
+package org.apache.bigtop.manager.agent.utils;
 
-import org.apache.bigtop.manager.server.model.vo.JobVO;
-import org.apache.bigtop.manager.server.model.vo.PageVO;
+import org.apache.commons.lang3.SystemUtils;
 
-public interface JobService {
+import java.io.File;
 
-    PageVO<JobVO> list(Long clusterId);
+public class LogFileUtils {
 
-    JobVO get(Long id);
+    public static String getLogFilePath(Long taskId) {
+        String baseDir;
+        if (SystemUtils.IS_OS_WINDOWS) {
+            baseDir = SystemUtils.getUserDir().getPath();
+        } else {
+            File file = new File(LogFileUtils.class
+                    .getProtectionDomain()
+                    .getCodeSource()
+                    .getLocation()
+                    .getPath());
+            baseDir = file.getParentFile().getParentFile().getPath();
+        }
+
+        return baseDir + File.separator + "tasklogs" + File.separator + 
"task-" + taskId + ".log";
+    }
 }
diff --git 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/controller/JobController.java
 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/controller/JobController.java
index 940afd3..97bcc09 100644
--- 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/controller/JobController.java
+++ 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/controller/JobController.java
@@ -25,6 +25,7 @@ import org.apache.bigtop.manager.server.utils.ResponseEntity;
 
 import org.springframework.web.bind.annotation.GetMapping;
 import org.springframework.web.bind.annotation.PathVariable;
+import org.springframework.web.bind.annotation.PostMapping;
 import org.springframework.web.bind.annotation.RequestMapping;
 import org.springframework.web.bind.annotation.RestController;
 
@@ -66,4 +67,10 @@ public class JobController {
     public ResponseEntity<JobVO> get(@PathVariable Long id, @PathVariable Long 
clusterId) {
         return ResponseEntity.success(jobService.get(id));
     }
+
+    @Operation(summary = "retry", description = "Retry a failed job")
+    @PostMapping("/{id}/retry")
+    public ResponseEntity<JobVO> retry(@PathVariable Long id, @PathVariable 
Long clusterId) {
+        return ResponseEntity.success(jobService.retry(id));
+    }
 }
diff --git 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/ApiExceptionEnum.java
 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/ApiExceptionEnum.java
index 97c8766..45ea423 100644
--- 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/ApiExceptionEnum.java
+++ 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/ApiExceptionEnum.java
@@ -52,6 +52,7 @@ public enum ApiExceptionEnum {
 
     // Job Exceptions -- 16000 ~ 16999
     JOB_NOT_FOUND(16000, LocaleKeys.JOB_NOT_FOUND),
+    JOB_NOT_RETRYABLE(16001, LocaleKeys.JOB_NOT_RETRYABLE),
 
     // Configuration Exceptions -- 17000 ~ 17999
     CONFIG_NOT_FOUND(17000, LocaleKeys.CONFIG_NOT_FOUND),
diff --git 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/LocaleKeys.java
 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/LocaleKeys.java
index 2a9ad0f..22dc64a 100644
--- 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/LocaleKeys.java
+++ 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/LocaleKeys.java
@@ -50,6 +50,7 @@ public enum LocaleKeys {
     COMPONENT_NOT_FOUND("component.not.found"),
 
     JOB_NOT_FOUND("job.not.found"),
+    JOB_NOT_RETRYABLE("job.not.retryable"),
 
     CONFIG_NOT_FOUND("config.not.found"),
 
diff --git 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
index 117292f..5d0e1d8 100644
--- 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
+++ 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
@@ -26,4 +26,6 @@ public interface JobService {
     PageVO<JobVO> list(Long clusterId);
 
     JobVO get(Long id);
+
+    JobVO retry(Long id);
 }
diff --git 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/impl/JobServiceImpl.java
 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/impl/JobServiceImpl.java
index b4391f7..77e191a 100644
--- 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/impl/JobServiceImpl.java
+++ 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/impl/JobServiceImpl.java
@@ -18,8 +18,16 @@
  */
 package org.apache.bigtop.manager.server.service.impl;
 
+import org.apache.bigtop.manager.common.enums.JobState;
 import org.apache.bigtop.manager.dao.entity.Job;
+import org.apache.bigtop.manager.dao.entity.Stage;
+import org.apache.bigtop.manager.dao.entity.Task;
 import org.apache.bigtop.manager.dao.repository.JobRepository;
+import org.apache.bigtop.manager.dao.repository.StageRepository;
+import org.apache.bigtop.manager.dao.repository.TaskRepository;
+import org.apache.bigtop.manager.server.command.scheduler.JobScheduler;
+import org.apache.bigtop.manager.server.enums.ApiExceptionEnum;
+import org.apache.bigtop.manager.server.exception.ApiException;
 import org.apache.bigtop.manager.server.model.mapper.JobMapper;
 import org.apache.bigtop.manager.server.model.query.PageQuery;
 import org.apache.bigtop.manager.server.model.vo.JobVO;
@@ -41,6 +49,15 @@ public class JobServiceImpl implements JobService {
     @Resource
     private JobRepository jobRepository;
 
+    @Resource
+    private StageRepository stageRepository;
+
+    @Resource
+    private TaskRepository taskRepository;
+
+    @Resource
+    private JobScheduler jobScheduler;
+
     @Override
     public PageVO<JobVO> list(Long clusterId) {
         PageQuery pageQuery = PageUtils.getPageQuery();
@@ -60,4 +77,28 @@ public class JobServiceImpl implements JobService {
         Job job = jobRepository.getReferenceById(id);
         return JobMapper.INSTANCE.fromEntity2VO(job);
     }
+
+    @Override
+    public JobVO retry(Long id) {
+        Job job = jobRepository.getReferenceById(id);
+        if (job.getState() != JobState.FAILED) {
+            throw new ApiException(ApiExceptionEnum.JOB_NOT_RETRYABLE);
+        }
+
+        for (Stage stage : job.getStages()) {
+            for (Task task : stage.getTasks()) {
+                task.setState(JobState.PENDING);
+                taskRepository.save(task);
+            }
+
+            stage.setState(JobState.PENDING);
+            stageRepository.save(stage);
+        }
+
+        job.setState(JobState.PENDING);
+        jobRepository.save(job);
+        jobScheduler.submit(job);
+
+        return JobMapper.INSTANCE.fromEntity2VO(job);
+    }
 }
diff --git 
a/bigtop-manager-server/src/main/resources/i18n/messages_en_US.properties 
b/bigtop-manager-server/src/main/resources/i18n/messages_en_US.properties
index 6560c05..5ae40e7 100644
--- a/bigtop-manager-server/src/main/resources/i18n/messages_en_US.properties
+++ b/bigtop-manager-server/src/main/resources/i18n/messages_en_US.properties
@@ -44,6 +44,7 @@ service.required.not.found=Required Service [{0}] not exist
 component.not.found=Component not exist
 
 job.not.found=Job not exist
+job.not.retryable=Job is not retryable when it's not failed
 
 config.not.found=Config not exist
 
diff --git 
a/bigtop-manager-server/src/main/resources/i18n/messages_zh_CN.properties 
b/bigtop-manager-server/src/main/resources/i18n/messages_zh_CN.properties
index 60d9ed7..a45ff71 100644
--- a/bigtop-manager-server/src/main/resources/i18n/messages_zh_CN.properties
+++ b/bigtop-manager-server/src/main/resources/i18n/messages_zh_CN.properties
@@ -44,6 +44,7 @@ service.required.not.found=依赖服务 [{0}] 不存在
 component.not.found=组件不存在
 
 job.not.found=任务不存在
+job.not.retryable=任务非失败状态,无法重试
 
 config.not.found=配置不存在
 
diff --git a/bigtop-manager-ui/src/api/job/index.ts 
b/bigtop-manager-ui/src/api/job/index.ts
index 869d7ed..d32ceb3 100644
--- a/bigtop-manager-ui/src/api/job/index.ts
+++ b/bigtop-manager-ui/src/api/job/index.ts
@@ -27,6 +27,13 @@ export const getJob = (id: number, clusterId: number): 
Promise<JobVO> => {
   })
 }
 
+export const retryJob = (id: number, clusterId: number): Promise<JobVO> => {
+  return request({
+    method: 'post',
+    url: '/clusters/' + clusterId + '/jobs/' + id + '/retry'
+  })
+}
+
 export const getJobs = (
   clusterId: number,
   pagination: Pagination
diff --git a/bigtop-manager-ui/src/components/service-add/install.vue 
b/bigtop-manager-ui/src/components/service-add/install.vue
index 99c9849..cfc7cf6 100644
--- a/bigtop-manager-ui/src/components/service-add/install.vue
+++ b/bigtop-manager-ui/src/components/service-add/install.vue
@@ -19,15 +19,16 @@
 
 <script setup lang="ts">
   import { useI18n } from 'vue-i18n'
-  import { getJob } from '@/api/job'
+  import { getJob, retryJob } from '@/api/job'
   import { JOB_SCHEDULE_INTERVAL } from '@/utils/constant.ts'
   import { useIntervalFn } from '@vueuse/core'
-  import { onBeforeMount, onBeforeUnmount, reactive, ref } from 'vue'
+  import { computed, onBeforeMount, onBeforeUnmount, reactive, ref } from 'vue'
   import { useClusterStore } from '@/store/cluster'
   import { storeToRefs } from 'pinia'
   import { JobVO, StageVO } from '@/api/job/types'
   import CustomProgress from '@/components/job-info/custom-progress.vue'
   import Job from '@/components/job-info/job.vue'
+  import { RedoOutlined } from '@ant-design/icons-vue'
 
   const serviceInfo = defineModel<any>('serviceInfo')
   const disableButton = defineModel<boolean>('disableButton')
@@ -43,6 +44,10 @@
   const currStage = ref<StageVO>()
   const installData = reactive([])
 
+  const canRetry = computed(() => {
+    return jobState.value === 'Failed'
+  })
+
   const installColumns = [
     {
       title: t('common.stage'),
@@ -56,6 +61,22 @@
     }
   ]
 
+  const doRetry = async () => {
+    await retryJob(serviceInfo.value.jobId, clusterId.value)
+
+    const { pause } = useIntervalFn(
+      async () => {
+        Object.assign(installData, await initData())
+        loading.value = false
+        if (!['Pending', 'Processing'].includes(jobState.value)) {
+          pause()
+        }
+      },
+      JOB_SCHEDULE_INTERVAL,
+      { immediateCallback: true }
+    )
+  }
+
   const initData = async () => {
     const res = await getJob(serviceInfo.value.jobId, clusterId.value)
     jobs.value = [res] as any
@@ -76,11 +97,12 @@
 
   onBeforeMount(async () => {
     disableButton.value = true
+
     const { pause } = useIntervalFn(
       async () => {
         Object.assign(installData, await initData())
         loading.value = false
-        if (jobState.value !== 'Pending' && jobState.value !== 'Processing') {
+        if (!['Pending', 'Processing'].includes(jobState.value)) {
           pause()
         }
       },
@@ -105,6 +127,14 @@
 <template>
   <div class="container">
     <div class="title">{{ $t('common.install') }}</div>
+    <div class="retry">
+      <a-button type="link" size="small" :disabled="!canRetry" 
@click="doRetry">
+        <template #icon>
+          <redo-outlined />
+        </template>
+        <span class="retry-button">{{ $t('common.retry') }}</span>
+      </a-button>
+    </div>
     <a-table
       :pagination="false"
       :scroll="{ y: 400 }"
@@ -147,9 +177,17 @@
       font-size: 1.5rem;
       line-height: 2rem;
       margin-bottom: 1rem;
+    }
+
+    .retry {
+      display: flex;
+      flex-direction: row;
+      justify-content: end;
+      margin: 0 1rem 1rem 0;
+      width: 100%;
 
-      .progress {
-        width: 80%;
+      .retry-button {
+        margin-left: 3px;
       }
     }
   }
diff --git a/bigtop-manager-ui/src/locales/en_US/common.ts 
b/bigtop-manager-ui/src/locales/en_US/common.ts
index ad0a1fc..920312f 100644
--- a/bigtop-manager-ui/src/locales/en_US/common.ts
+++ b/bigtop-manager-ui/src/locales/en_US/common.ts
@@ -26,6 +26,7 @@ export default {
   status: 'Status',
   edit: 'Edit',
   submit: 'Submit',
+  retry: 'Retry',
   cancel: 'Cancel',
   confirm: 'Confirm',
   exit_confirm: 'Are you sure you want to exit?',
diff --git a/bigtop-manager-ui/src/locales/zh_CN/common.ts 
b/bigtop-manager-ui/src/locales/zh_CN/common.ts
index 281bb17..8de97e0 100644
--- a/bigtop-manager-ui/src/locales/zh_CN/common.ts
+++ b/bigtop-manager-ui/src/locales/zh_CN/common.ts
@@ -26,6 +26,7 @@ export default {
   status: '状态',
   edit: '编辑',
   submit: '提交',
+  retry: '重试',
   cancel: '取消',
   confirm: '确认',
   exit_confirm: '确定要退出吗?',

Reply via email to