This is an automated email from the ASF dual-hosted git repository.
zhongjiajie pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/dolphinscheduler.git
The following commit(s) were added to refs/heads/dev by this push:
new a1861bc2df Use DATAX_PYTHON to specify a datax python version (#13849)
a1861bc2df is described below
commit a1861bc2df9c462de03df9688bd6e673e695432d
Author: Wenjun Ruan <[email protected]>
AuthorDate: Mon Apr 3 08:47:32 2023 +0800
Use DATAX_PYTHON to specify a datax python version (#13849)
(cherry picked from commit 752452ecbd2f8094b819eae219f3eec0f6a6427b)
---
docs/docs/en/guide/task/datax.md | 2 ++
docs/docs/zh/guide/task/datax.md | 2 ++
.../plugin/task/datax/DataxTask.java | 28 ++++------------------
.../plugin/task/datax/DataxTaskTest.java | 10 --------
4 files changed, 8 insertions(+), 34 deletions(-)
diff --git a/docs/docs/en/guide/task/datax.md b/docs/docs/en/guide/task/datax.md
index 7f87c0eae6..feeeef5e05 100644
--- a/docs/docs/en/guide/task/datax.md
+++ b/docs/docs/en/guide/task/datax.md
@@ -4,6 +4,8 @@
DataX task type for executing DataX programs. For DataX nodes, the worker will
execute `${DATAX_HOME}/bin/datax.py` to analyze the input json file.
+By default, the datax.py will be executed by python2.7, if you want to use
other python version, you can set the `DATAX_PYTHON` environment variable to
specify a version.
+
## Create Task
- Click `Project Management -> Project Name -> Workflow Definition`, and click
the `Create Workflow` button to enter the DAG editing page.
diff --git a/docs/docs/zh/guide/task/datax.md b/docs/docs/zh/guide/task/datax.md
index cf50dc7e84..e738743707 100644
--- a/docs/docs/zh/guide/task/datax.md
+++ b/docs/docs/zh/guide/task/datax.md
@@ -4,6 +4,8 @@
DataX 任务类型,用于执行 DataX 程序。对于 DataX 节点,worker 会通过执行 `${DATAX_HOME}/bin/datax.py`
来解析传入的 json 文件。
+默认会使用python2.7去执行datax.py,如果需要使用其他版本的python去执行datax.py,需要在环境变量中配置`DATAX_PYTHON`。
+
## 创建任务
- 点击项目管理 -> 项目名称 -> 工作流定义,点击“创建工作流”按钮,进入 DAG 编辑页面;
diff --git
a/dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/main/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTask.java
b/dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/main/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTask.java
index 5e82bb71f1..df20228027 100644
---
a/dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/main/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTask.java
+++
b/dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/main/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTask.java
@@ -40,14 +40,12 @@ import org.apache.dolphinscheduler.spi.enums.Flag;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.SystemUtils;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.nio.file.attribute.FileAttribute;
import java.nio.file.attribute.PosixFilePermission;
@@ -60,10 +58,9 @@ import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ExecutionException;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import com.alibaba.druid.sql.ast.SQLStatement;
import com.alibaba.druid.sql.ast.expr.SQLIdentifierExpr;
@@ -87,9 +84,9 @@ public class DataxTask extends AbstractTask {
public static final String CUSTOM_PARAM = " -D%s='%s'";
/**
* python process(datax only supports version 2.7 by default)
+ * todo: Create a shell script to execute the datax task, and read the
python version from the env, so we can support multiple versions of datax python
*/
- private static final String DATAX_PYTHON = "python2.7";
- private static final Pattern PYTHON_PATH_PATTERN =
Pattern.compile("/bin/python[\\d.]*$");
+ private static final String DATAX_PYTHON =
Optional.ofNullable(System.getenv("DATAX_PYTHON")).orElse("python2.7");
/**
* select all
@@ -399,7 +396,7 @@ public class DataxTask extends AbstractTask {
}
// datax python command
- String sbr = getPythonCommand() +
+ String sbr = DATAX_PYTHON +
" " +
DATAX_PATH +
" " +
@@ -441,23 +438,6 @@ public class DataxTask extends AbstractTask {
return customParameters;
}
- public String getPythonCommand() {
- String pythonHome = System.getenv("PYTHON_HOME");
- return getPythonCommand(pythonHome);
- }
-
- public String getPythonCommand(String pythonHome) {
- if (StringUtils.isEmpty(pythonHome)) {
- return DATAX_PYTHON;
- }
- String pythonBinPath = "/bin/" + DATAX_PYTHON;
- Matcher matcher = PYTHON_PATH_PATTERN.matcher(pythonHome);
- if (matcher.find()) {
- return matcher.replaceAll(pythonBinPath);
- }
- return Paths.get(pythonHome, pythonBinPath).toString();
- }
-
public String loadJvmEnv(DataxParameters dataXParameters) {
int xms = Math.max(dataXParameters.getXms(), 1);
int xmx = Math.max(dataXParameters.getXmx(), 1);
diff --git
a/dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/test/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTaskTest.java
b/dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/test/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTaskTest.java
index 7368b6208f..d5054e836d 100644
---
a/dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/test/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTaskTest.java
+++
b/dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/test/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTaskTest.java
@@ -245,16 +245,6 @@ public class DataxTaskTest {
}
}
- @Test
- public void testGetPythonCommand() {
- Assertions.assertEquals(dataxTask.getPythonCommand(""), "python2.7");
- Assertions.assertEquals(dataxTask.getPythonCommand("/bin/python"),
"/bin/python2.7");
-
- String pythonCommand = dataxTask.getPythonCommand("/opt/python");
- pythonCommand = pythonCommand.replace(File.separator, "/");
- Assertions.assertEquals(pythonCommand, "/opt/python/bin/python2.7");
- }
-
@Test
public void testLoadJvmEnv() {
DataxParameters dataXParameters = createDataxParameters();