This is an automated email from the ASF dual-hosted git repository.
wuzhiguo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/bigtop-manager.git
The following commit(s) were added to refs/heads/main by this push:
new ae8e43d BIGTOP-4158: Support service add job retry when failed (#17)
ae8e43d is described below
commit ae8e43d4a37c11068f77c28cba38050bacae2fd8
Author: Zhiguo Wu <[email protected]>
AuthorDate: Mon Jul 15 10:40:58 2024 +0800
BIGTOP-4158: Support service add job retry when failed (#17)
---
.../agent/service/CommandServiceGrpcImpl.java | 20 +++++++++
.../agent/service/TaskLogServiceGrpcImpl.java | 22 +---------
.../bigtop/manager/agent/utils/LogFileUtils.java | 25 ++++++++---
.../manager/server/controller/JobController.java | 7 ++++
.../manager/server/enums/ApiExceptionEnum.java | 1 +
.../bigtop/manager/server/enums/LocaleKeys.java | 1 +
.../bigtop/manager/server/service/JobService.java | 2 +
.../server/service/impl/JobServiceImpl.java | 41 ++++++++++++++++++
.../main/resources/i18n/messages_en_US.properties | 1 +
.../main/resources/i18n/messages_zh_CN.properties | 1 +
bigtop-manager-ui/src/api/job/index.ts | 7 ++++
.../src/components/service-add/install.vue | 48 +++++++++++++++++++---
bigtop-manager-ui/src/locales/en_US/common.ts | 1 +
bigtop-manager-ui/src/locales/zh_CN/common.ts | 1 +
14 files changed, 147 insertions(+), 31 deletions(-)
diff --git
a/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/CommandServiceGrpcImpl.java
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/CommandServiceGrpcImpl.java
index ed1f829..c9d9368 100644
---
a/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/CommandServiceGrpcImpl.java
+++
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/CommandServiceGrpcImpl.java
@@ -21,6 +21,7 @@ package org.apache.bigtop.manager.agent.service;
import org.apache.bigtop.manager.agent.cache.Caches;
import org.apache.bigtop.manager.agent.executor.CommandExecutor;
import org.apache.bigtop.manager.agent.executor.CommandExecutors;
+import org.apache.bigtop.manager.agent.utils.LogFileUtils;
import org.apache.bigtop.manager.grpc.generated.CommandReply;
import org.apache.bigtop.manager.grpc.generated.CommandRequest;
import org.apache.bigtop.manager.grpc.generated.CommandServiceGrpc;
@@ -32,6 +33,10 @@ import io.grpc.stub.StreamObserver;
import lombok.extern.slf4j.Slf4j;
import net.devh.boot.grpc.server.service.GrpcService;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
@Slf4j
@GrpcService
public class CommandServiceGrpcImpl extends
CommandServiceGrpc.CommandServiceImplBase {
@@ -39,6 +44,9 @@ public class CommandServiceGrpcImpl extends
CommandServiceGrpc.CommandServiceImp
@Override
public void exec(CommandRequest request, StreamObserver<CommandReply>
responseObserver) {
try {
+ // Truncate old logs if exists, only useful when it's retry command
+ truncateLogFile(request.getTaskId());
+
MDC.put("taskId", String.valueOf(request.getTaskId()));
Caches.RUNNING_TASKS.add(request.getTaskId());
CommandExecutor commandExecutor =
CommandExecutors.getCommandExecutor(request.getType());
@@ -54,4 +62,16 @@ public class CommandServiceGrpcImpl extends
CommandServiceGrpc.CommandServiceImp
MDC.clear();
}
}
+
+ private void truncateLogFile(Long taskId) {
+ String filePath = LogFileUtils.getLogFilePath(taskId);
+ File file = new File(filePath);
+ if (file.exists()) {
+ try (RandomAccessFile rf = new RandomAccessFile(file, "rw")) {
+ rf.setLength(0);
+ } catch (IOException e) {
+ log.warn("Error when truncate file: {}", filePath, e);
+ }
+ }
+ }
}
diff --git
a/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/TaskLogServiceGrpcImpl.java
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/TaskLogServiceGrpcImpl.java
index 018ad0a..781cc3f 100644
---
a/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/TaskLogServiceGrpcImpl.java
+++
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/service/TaskLogServiceGrpcImpl.java
@@ -19,18 +19,16 @@
package org.apache.bigtop.manager.agent.service;
import org.apache.bigtop.manager.agent.cache.Caches;
+import org.apache.bigtop.manager.agent.utils.LogFileUtils;
import org.apache.bigtop.manager.grpc.generated.TaskLogReply;
import org.apache.bigtop.manager.grpc.generated.TaskLogRequest;
import org.apache.bigtop.manager.grpc.generated.TaskLogServiceGrpc;
-import org.apache.commons.lang3.SystemUtils;
-
import io.grpc.Status;
import io.grpc.stub.StreamObserver;
import lombok.extern.slf4j.Slf4j;
import net.devh.boot.grpc.server.service.GrpcService;
-import java.io.File;
import java.io.RandomAccessFile;
import java.nio.charset.StandardCharsets;
@@ -40,7 +38,7 @@ public class TaskLogServiceGrpcImpl extends
TaskLogServiceGrpc.TaskLogServiceImp
@Override
public void getLog(TaskLogRequest request, StreamObserver<TaskLogReply>
responseObserver) {
- String path = getLogFilePath(request.getTaskId());
+ String path = LogFileUtils.getLogFilePath(request.getTaskId());
try (RandomAccessFile file = new RandomAccessFile(path, "r")) {
// Read from beginning
long fileLength = file.length();
@@ -86,20 +84,4 @@ public class TaskLogServiceGrpcImpl extends
TaskLogServiceGrpc.TaskLogServiceImp
}
}
}
-
- private String getLogFilePath(Long taskId) {
- String baseDir;
- if (SystemUtils.IS_OS_WINDOWS) {
- baseDir = SystemUtils.getUserDir().getPath();
- } else {
- File file = new File(this.getClass()
- .getProtectionDomain()
- .getCodeSource()
- .getLocation()
- .getPath());
- baseDir = file.getParentFile().getParentFile().getPath();
- }
-
- return baseDir + File.separator + "tasklogs" + File.separator +
"task-" + taskId + ".log";
- }
}
diff --git
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/utils/LogFileUtils.java
similarity index 53%
copy from
bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
copy to
bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/utils/LogFileUtils.java
index 117292f..e87929a 100644
---
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
+++
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/utils/LogFileUtils.java
@@ -16,14 +16,27 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.bigtop.manager.server.service;
+package org.apache.bigtop.manager.agent.utils;
-import org.apache.bigtop.manager.server.model.vo.JobVO;
-import org.apache.bigtop.manager.server.model.vo.PageVO;
+import org.apache.commons.lang3.SystemUtils;
-public interface JobService {
+import java.io.File;
- PageVO<JobVO> list(Long clusterId);
+public class LogFileUtils {
- JobVO get(Long id);
+ public static String getLogFilePath(Long taskId) {
+ String baseDir;
+ if (SystemUtils.IS_OS_WINDOWS) {
+ baseDir = SystemUtils.getUserDir().getPath();
+ } else {
+ File file = new File(LogFileUtils.class
+ .getProtectionDomain()
+ .getCodeSource()
+ .getLocation()
+ .getPath());
+ baseDir = file.getParentFile().getParentFile().getPath();
+ }
+
+ return baseDir + File.separator + "tasklogs" + File.separator +
"task-" + taskId + ".log";
+ }
}
diff --git
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/controller/JobController.java
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/controller/JobController.java
index 940afd3..97bcc09 100644
---
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/controller/JobController.java
+++
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/controller/JobController.java
@@ -25,6 +25,7 @@ import org.apache.bigtop.manager.server.utils.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
+import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
@@ -66,4 +67,10 @@ public class JobController {
public ResponseEntity<JobVO> get(@PathVariable Long id, @PathVariable Long
clusterId) {
return ResponseEntity.success(jobService.get(id));
}
+
+ @Operation(summary = "retry", description = "Retry a failed job")
+ @PostMapping("/{id}/retry")
+ public ResponseEntity<JobVO> retry(@PathVariable Long id, @PathVariable
Long clusterId) {
+ return ResponseEntity.success(jobService.retry(id));
+ }
}
diff --git
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/ApiExceptionEnum.java
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/ApiExceptionEnum.java
index 97c8766..45ea423 100644
---
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/ApiExceptionEnum.java
+++
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/ApiExceptionEnum.java
@@ -52,6 +52,7 @@ public enum ApiExceptionEnum {
// Job Exceptions -- 16000 ~ 16999
JOB_NOT_FOUND(16000, LocaleKeys.JOB_NOT_FOUND),
+ JOB_NOT_RETRYABLE(16001, LocaleKeys.JOB_NOT_RETRYABLE),
// Configuration Exceptions -- 17000 ~ 17999
CONFIG_NOT_FOUND(17000, LocaleKeys.CONFIG_NOT_FOUND),
diff --git
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/LocaleKeys.java
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/LocaleKeys.java
index 2a9ad0f..22dc64a 100644
---
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/LocaleKeys.java
+++
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/enums/LocaleKeys.java
@@ -50,6 +50,7 @@ public enum LocaleKeys {
COMPONENT_NOT_FOUND("component.not.found"),
JOB_NOT_FOUND("job.not.found"),
+ JOB_NOT_RETRYABLE("job.not.retryable"),
CONFIG_NOT_FOUND("config.not.found"),
diff --git
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
index 117292f..5d0e1d8 100644
---
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
+++
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/JobService.java
@@ -26,4 +26,6 @@ public interface JobService {
PageVO<JobVO> list(Long clusterId);
JobVO get(Long id);
+
+ JobVO retry(Long id);
}
diff --git
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/impl/JobServiceImpl.java
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/impl/JobServiceImpl.java
index b4391f7..77e191a 100644
---
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/impl/JobServiceImpl.java
+++
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/service/impl/JobServiceImpl.java
@@ -18,8 +18,16 @@
*/
package org.apache.bigtop.manager.server.service.impl;
+import org.apache.bigtop.manager.common.enums.JobState;
import org.apache.bigtop.manager.dao.entity.Job;
+import org.apache.bigtop.manager.dao.entity.Stage;
+import org.apache.bigtop.manager.dao.entity.Task;
import org.apache.bigtop.manager.dao.repository.JobRepository;
+import org.apache.bigtop.manager.dao.repository.StageRepository;
+import org.apache.bigtop.manager.dao.repository.TaskRepository;
+import org.apache.bigtop.manager.server.command.scheduler.JobScheduler;
+import org.apache.bigtop.manager.server.enums.ApiExceptionEnum;
+import org.apache.bigtop.manager.server.exception.ApiException;
import org.apache.bigtop.manager.server.model.mapper.JobMapper;
import org.apache.bigtop.manager.server.model.query.PageQuery;
import org.apache.bigtop.manager.server.model.vo.JobVO;
@@ -41,6 +49,15 @@ public class JobServiceImpl implements JobService {
@Resource
private JobRepository jobRepository;
+ @Resource
+ private StageRepository stageRepository;
+
+ @Resource
+ private TaskRepository taskRepository;
+
+ @Resource
+ private JobScheduler jobScheduler;
+
@Override
public PageVO<JobVO> list(Long clusterId) {
PageQuery pageQuery = PageUtils.getPageQuery();
@@ -60,4 +77,28 @@ public class JobServiceImpl implements JobService {
Job job = jobRepository.getReferenceById(id);
return JobMapper.INSTANCE.fromEntity2VO(job);
}
+
+ @Override
+ public JobVO retry(Long id) {
+ Job job = jobRepository.getReferenceById(id);
+ if (job.getState() != JobState.FAILED) {
+ throw new ApiException(ApiExceptionEnum.JOB_NOT_RETRYABLE);
+ }
+
+ for (Stage stage : job.getStages()) {
+ for (Task task : stage.getTasks()) {
+ task.setState(JobState.PENDING);
+ taskRepository.save(task);
+ }
+
+ stage.setState(JobState.PENDING);
+ stageRepository.save(stage);
+ }
+
+ job.setState(JobState.PENDING);
+ jobRepository.save(job);
+ jobScheduler.submit(job);
+
+ return JobMapper.INSTANCE.fromEntity2VO(job);
+ }
}
diff --git
a/bigtop-manager-server/src/main/resources/i18n/messages_en_US.properties
b/bigtop-manager-server/src/main/resources/i18n/messages_en_US.properties
index 6560c05..5ae40e7 100644
--- a/bigtop-manager-server/src/main/resources/i18n/messages_en_US.properties
+++ b/bigtop-manager-server/src/main/resources/i18n/messages_en_US.properties
@@ -44,6 +44,7 @@ service.required.not.found=Required Service [{0}] not exist
component.not.found=Component not exist
job.not.found=Job not exist
+job.not.retryable=Job is not retryable when it's not failed
config.not.found=Config not exist
diff --git
a/bigtop-manager-server/src/main/resources/i18n/messages_zh_CN.properties
b/bigtop-manager-server/src/main/resources/i18n/messages_zh_CN.properties
index 60d9ed7..a45ff71 100644
--- a/bigtop-manager-server/src/main/resources/i18n/messages_zh_CN.properties
+++ b/bigtop-manager-server/src/main/resources/i18n/messages_zh_CN.properties
@@ -44,6 +44,7 @@ service.required.not.found=依赖服务 [{0}] 不存在
component.not.found=组件不存在
job.not.found=任务不存在
+job.not.retryable=任务非失败状态,无法重试
config.not.found=配置不存在
diff --git a/bigtop-manager-ui/src/api/job/index.ts
b/bigtop-manager-ui/src/api/job/index.ts
index 869d7ed..d32ceb3 100644
--- a/bigtop-manager-ui/src/api/job/index.ts
+++ b/bigtop-manager-ui/src/api/job/index.ts
@@ -27,6 +27,13 @@ export const getJob = (id: number, clusterId: number):
Promise<JobVO> => {
})
}
+export const retryJob = (id: number, clusterId: number): Promise<JobVO> => {
+ return request({
+ method: 'post',
+ url: '/clusters/' + clusterId + '/jobs/' + id + '/retry'
+ })
+}
+
export const getJobs = (
clusterId: number,
pagination: Pagination
diff --git a/bigtop-manager-ui/src/components/service-add/install.vue
b/bigtop-manager-ui/src/components/service-add/install.vue
index 99c9849..cfc7cf6 100644
--- a/bigtop-manager-ui/src/components/service-add/install.vue
+++ b/bigtop-manager-ui/src/components/service-add/install.vue
@@ -19,15 +19,16 @@
<script setup lang="ts">
import { useI18n } from 'vue-i18n'
- import { getJob } from '@/api/job'
+ import { getJob, retryJob } from '@/api/job'
import { JOB_SCHEDULE_INTERVAL } from '@/utils/constant.ts'
import { useIntervalFn } from '@vueuse/core'
- import { onBeforeMount, onBeforeUnmount, reactive, ref } from 'vue'
+ import { computed, onBeforeMount, onBeforeUnmount, reactive, ref } from 'vue'
import { useClusterStore } from '@/store/cluster'
import { storeToRefs } from 'pinia'
import { JobVO, StageVO } from '@/api/job/types'
import CustomProgress from '@/components/job-info/custom-progress.vue'
import Job from '@/components/job-info/job.vue'
+ import { RedoOutlined } from '@ant-design/icons-vue'
const serviceInfo = defineModel<any>('serviceInfo')
const disableButton = defineModel<boolean>('disableButton')
@@ -43,6 +44,10 @@
const currStage = ref<StageVO>()
const installData = reactive([])
+ const canRetry = computed(() => {
+ return jobState.value === 'Failed'
+ })
+
const installColumns = [
{
title: t('common.stage'),
@@ -56,6 +61,22 @@
}
]
+ const doRetry = async () => {
+ await retryJob(serviceInfo.value.jobId, clusterId.value)
+
+ const { pause } = useIntervalFn(
+ async () => {
+ Object.assign(installData, await initData())
+ loading.value = false
+ if (!['Pending', 'Processing'].includes(jobState.value)) {
+ pause()
+ }
+ },
+ JOB_SCHEDULE_INTERVAL,
+ { immediateCallback: true }
+ )
+ }
+
const initData = async () => {
const res = await getJob(serviceInfo.value.jobId, clusterId.value)
jobs.value = [res] as any
@@ -76,11 +97,12 @@
onBeforeMount(async () => {
disableButton.value = true
+
const { pause } = useIntervalFn(
async () => {
Object.assign(installData, await initData())
loading.value = false
- if (jobState.value !== 'Pending' && jobState.value !== 'Processing') {
+ if (!['Pending', 'Processing'].includes(jobState.value)) {
pause()
}
},
@@ -105,6 +127,14 @@
<template>
<div class="container">
<div class="title">{{ $t('common.install') }}</div>
+ <div class="retry">
+ <a-button type="link" size="small" :disabled="!canRetry"
@click="doRetry">
+ <template #icon>
+ <redo-outlined />
+ </template>
+ <span class="retry-button">{{ $t('common.retry') }}</span>
+ </a-button>
+ </div>
<a-table
:pagination="false"
:scroll="{ y: 400 }"
@@ -147,9 +177,17 @@
font-size: 1.5rem;
line-height: 2rem;
margin-bottom: 1rem;
+ }
+
+ .retry {
+ display: flex;
+ flex-direction: row;
+ justify-content: end;
+ margin: 0 1rem 1rem 0;
+ width: 100%;
- .progress {
- width: 80%;
+ .retry-button {
+ margin-left: 3px;
}
}
}
diff --git a/bigtop-manager-ui/src/locales/en_US/common.ts
b/bigtop-manager-ui/src/locales/en_US/common.ts
index ad0a1fc..920312f 100644
--- a/bigtop-manager-ui/src/locales/en_US/common.ts
+++ b/bigtop-manager-ui/src/locales/en_US/common.ts
@@ -26,6 +26,7 @@ export default {
status: 'Status',
edit: 'Edit',
submit: 'Submit',
+ retry: 'Retry',
cancel: 'Cancel',
confirm: 'Confirm',
exit_confirm: 'Are you sure you want to exit?',
diff --git a/bigtop-manager-ui/src/locales/zh_CN/common.ts
b/bigtop-manager-ui/src/locales/zh_CN/common.ts
index 281bb17..8de97e0 100644
--- a/bigtop-manager-ui/src/locales/zh_CN/common.ts
+++ b/bigtop-manager-ui/src/locales/zh_CN/common.ts
@@ -26,6 +26,7 @@ export default {
status: '状态',
edit: '编辑',
submit: '提交',
+ retry: '重试',
cancel: '取消',
confirm: '确认',
exit_confirm: '确定要退出吗?',