This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch asf-site
in repository
https://gitbox.apache.org/repos/asf/incubator-dolphinscheduler-website.git
The following commit(s) were added to refs/heads/asf-site by this push:
new 1e4b61c Automated deployment: Wed Feb 26 08:12:40 UTC 2020
e54c58efa973fea3820cc2cbe005e922e5dc8037
1e4b61c is described below
commit 1e4b61c728dcd9fdf2a0a10f11e41c63ed734d43
Author: lgcareer <[email protected]>
AuthorDate: Wed Feb 26 08:12:40 2020 +0000
Automated deployment: Wed Feb 26 08:12:40 UTC 2020
e54c58efa973fea3820cc2cbe005e922e5dc8037
---
build/blog.js | 2 +-
build/blogDetail.js | 2 +-
build/community.js | 2 +-
build/documentation.js | 2 +-
build/home.js | 2 +-
en-us/docs/1.2.1/user_doc/architecture-design.html | 304 ++++++++
en-us/docs/1.2.1/user_doc/architecture-design.json | 6 +
en-us/docs/1.2.1/user_doc/metadata-1.2.html | 659 +++++++++++++++++
en-us/docs/1.2.1/user_doc/metadata-1.2.json | 6 +
en-us/docs/1.2.1/user_doc/plugin-development.html | 81 +++
en-us/docs/1.2.1/user_doc/plugin-development.json | 6 +
en-us/docs/1.2.1/user_doc/quick-start.html | 101 +++
en-us/docs/1.2.1/user_doc/quick-start.json | 6 +
en-us/docs/1.2.1/user_doc/system-manual.html | 776 +++++++++++++++++++++
en-us/docs/1.2.1/user_doc/system-manual.json | 6 +
en-us/docs/1.2.1/user_doc/upgrade.html | 65 ++
en-us/docs/1.2.1/user_doc/upgrade.json | 6 +
17 files changed, 2027 insertions(+), 5 deletions(-)
diff --git a/build/blog.js b/build/blog.js
index c445723..859117e 100644
--- a/build/blog.js
+++ b/build/blog.js
@@ -8,7 +8,7 @@ object-assign
(c) Sindre Sorhus
@license MIT
*/
-var
o=Object.getOwnPropertySymbols,a=Object.prototype.hasOwnProperty,i=Object.prototype.propertyIsEnumerable;e.exports=function(){try{if(!Object.assign)return!1;var
e=new
String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var
t={},n=0;n<10;n++)t["_"+String.fromCharCode(n)]=n;if("0123456789"!==Object.getOwnPropertyNames(t).map(function(e){return
t[e]}).join(""))return!1;var
r={};return"abcdefghijklmnopqrst".split("").forEach(function(e){r[e]=e}),"abcdefghijklmn
[...]
+var
o=Object.getOwnPropertySymbols,a=Object.prototype.hasOwnProperty,i=Object.prototype.propertyIsEnumerable;e.exports=function(){try{if(!Object.assign)return!1;var
e=new
String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var
t={},n=0;n<10;n++)t["_"+String.fromCharCode(n)]=n;if("0123456789"!==Object.getOwnPropertyNames(t).map(function(e){return
t[e]}).join(""))return!1;var
r={};return"abcdefghijklmnopqrst".split("").forEach(function(e){r[e]=e}),"abcdefghijklmn
[...]
* react-is.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
diff --git a/build/blogDetail.js b/build/blogDetail.js
index 74dd410..1676456 100644
--- a/build/blogDetail.js
+++ b/build/blogDetail.js
@@ -8,7 +8,7 @@ object-assign
(c) Sindre Sorhus
@license MIT
*/
-var
o=Object.getOwnPropertySymbols,i=Object.prototype.hasOwnProperty,a=Object.prototype.propertyIsEnumerable;e.exports=function(){try{if(!Object.assign)return!1;var
e=new
String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var
t={},n=0;n<10;n++)t["_"+String.fromCharCode(n)]=n;if("0123456789"!==Object.getOwnPropertyNames(t).map(function(e){return
t[e]}).join(""))return!1;var
r={};return"abcdefghijklmnopqrst".split("").forEach(function(e){r[e]=e}),"abcdefghijklmn
[...]
+var
o=Object.getOwnPropertySymbols,i=Object.prototype.hasOwnProperty,a=Object.prototype.propertyIsEnumerable;e.exports=function(){try{if(!Object.assign)return!1;var
e=new
String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var
t={},n=0;n<10;n++)t["_"+String.fromCharCode(n)]=n;if("0123456789"!==Object.getOwnPropertyNames(t).map(function(e){return
t[e]}).join(""))return!1;var
r={};return"abcdefghijklmnopqrst".split("").forEach(function(e){r[e]=e}),"abcdefghijklmn
[...]
* react-is.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
diff --git a/build/community.js b/build/community.js
index bb601cc..e2e04b3 100644
--- a/build/community.js
+++ b/build/community.js
@@ -8,7 +8,7 @@ object-assign
(c) Sindre Sorhus
@license MIT
*/
-var
o=Object.getOwnPropertySymbols,i=Object.prototype.hasOwnProperty,a=Object.prototype.propertyIsEnumerable;e.exports=function(){try{if(!Object.assign)return!1;var
e=new
String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var
t={},n=0;n<10;n++)t["_"+String.fromCharCode(n)]=n;if("0123456789"!==Object.getOwnPropertyNames(t).map(function(e){return
t[e]}).join(""))return!1;var
r={};return"abcdefghijklmnopqrst".split("").forEach(function(e){r[e]=e}),"abcdefghijklmn
[...]
+var
o=Object.getOwnPropertySymbols,i=Object.prototype.hasOwnProperty,a=Object.prototype.propertyIsEnumerable;e.exports=function(){try{if(!Object.assign)return!1;var
e=new
String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var
t={},n=0;n<10;n++)t["_"+String.fromCharCode(n)]=n;if("0123456789"!==Object.getOwnPropertyNames(t).map(function(e){return
t[e]}).join(""))return!1;var
r={};return"abcdefghijklmnopqrst".split("").forEach(function(e){r[e]=e}),"abcdefghijklmn
[...]
* react-is.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
diff --git a/build/documentation.js b/build/documentation.js
index bd6972a..f0335ef 100644
--- a/build/documentation.js
+++ b/build/documentation.js
@@ -8,7 +8,7 @@ object-assign
(c) Sindre Sorhus
@license MIT
*/
-var
o=Object.getOwnPropertySymbols,i=Object.prototype.hasOwnProperty,a=Object.prototype.propertyIsEnumerable;e.exports=function(){try{if(!Object.assign)return!1;var
e=new
String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var
t={},n=0;n<10;n++)t["_"+String.fromCharCode(n)]=n;if("0123456789"!==Object.getOwnPropertyNames(t).map(function(e){return
t[e]}).join(""))return!1;var
r={};return"abcdefghijklmnopqrst".split("").forEach(function(e){r[e]=e}),"abcdefghijklmn
[...]
+var
o=Object.getOwnPropertySymbols,i=Object.prototype.hasOwnProperty,a=Object.prototype.propertyIsEnumerable;e.exports=function(){try{if(!Object.assign)return!1;var
e=new
String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var
t={},n=0;n<10;n++)t["_"+String.fromCharCode(n)]=n;if("0123456789"!==Object.getOwnPropertyNames(t).map(function(e){return
t[e]}).join(""))return!1;var
r={};return"abcdefghijklmnopqrst".split("").forEach(function(e){r[e]=e}),"abcdefghijklmn
[...]
* react-is.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
diff --git a/build/home.js b/build/home.js
index 46c5abd..765d363 100644
--- a/build/home.js
+++ b/build/home.js
@@ -8,7 +8,7 @@ object-assign
(c) Sindre Sorhus
@license MIT
*/
-var
o=Object.getOwnPropertySymbols,a=Object.prototype.hasOwnProperty,i=Object.prototype.propertyIsEnumerable;e.exports=function(){try{if(!Object.assign)return!1;var
e=new
String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var
t={},n=0;n<10;n++)t["_"+String.fromCharCode(n)]=n;if("0123456789"!==Object.getOwnPropertyNames(t).map(function(e){return
t[e]}).join(""))return!1;var
r={};return"abcdefghijklmnopqrst".split("").forEach(function(e){r[e]=e}),"abcdefghijklmn
[...]
+var
o=Object.getOwnPropertySymbols,a=Object.prototype.hasOwnProperty,i=Object.prototype.propertyIsEnumerable;e.exports=function(){try{if(!Object.assign)return!1;var
e=new
String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var
t={},n=0;n<10;n++)t["_"+String.fromCharCode(n)]=n;if("0123456789"!==Object.getOwnPropertyNames(t).map(function(e){return
t[e]}).join(""))return!1;var
r={};return"abcdefghijklmnopqrst".split("").forEach(function(e){r[e]=e}),"abcdefghijklmn
[...]
* react-is.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
diff --git a/en-us/docs/1.2.1/user_doc/architecture-design.html
b/en-us/docs/1.2.1/user_doc/architecture-design.html
new file mode 100644
index 0000000..b96478d
--- /dev/null
+++ b/en-us/docs/1.2.1/user_doc/architecture-design.html
@@ -0,0 +1,304 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0,
maximum-scale=1.0, user-scalable=no">
+ <meta name="keywords" content="architecture-design" />
+ <meta name="description" content="architecture-design" />
+ <!-- 网页标签标题 -->
+ <title>architecture-design</title>
+ <link rel="shortcut icon" href="/img/docsite.ico"/>
+ <link rel="stylesheet" href="/build/documentation.css" />
+</head>
+<body>
+ <div id="root"><div class="documentation-page"
data-reactroot=""><header class="header-container header-container-normal"><div
class="header-body"><a href="/en-us/index.html"><img class="logo"
src="/img/hlogo_colorful.svg"/></a><div class="search search-normal"><span
class="icon-search"></span></div><span class="language-switch
language-switch-normal">中</span><div class="header-menu"><img
class="header-menu-toggle" src="/img/system/menu_gray.png"/><div><ul
class="ant-menu blackClass ant [...]
+<p>Before explaining the architecture of the schedule system, let us first
understand the common nouns of the schedule system.</p>
+<h3>1.Noun Interpretation</h3>
+<p><strong>DAG:</strong> Full name Directed Acyclic Graph,referred to as
DAG。Tasks in the workflow are assembled in the form of directed acyclic graphs,
which are topologically traversed from nodes with zero indegrees of ingress
until there are no successor nodes. For example, the following picture:</p>
+<p align="center">
+ <img src="/img/dag_examples_cn.jpg" alt="dag示例" width="60%" />
+ <p align="center">
+ <em>dag example</em>
+ </p>
+</p>
+<p><strong>Process definition</strong>: Visualization <strong>DAG</strong> by
dragging task nodes and establishing associations of task nodes</p>
+<p><strong>Process instance</strong>: A process instance is an instantiation
of a process definition, which can be generated by manual startup or
scheduling. The process definition runs once, a new process instance is
generated</p>
+<p><strong>Task instance</strong>: A task instance is the instantiation of a
specific task node when a process instance runs, which indicates the specific
task execution status</p>
+<p><strong>Task type</strong>: Currently supports SHELL, SQL, SUB_PROCESS
(sub-process), PROCEDURE, MR, SPARK, PYTHON, DEPENDENT (dependency), and plans
to support dynamic plug-in extension, note: the
sub-<strong>SUB_PROCESS</strong> is also A separate process definition that can
be launched separately</p>
+<p><strong>Schedule mode</strong> : The system supports timing schedule and
manual schedule based on cron expressions. Command type support: start
workflow, start execution from current node, resume fault-tolerant workflow,
resume pause process, start execution from failed node, complement, timer,
rerun, pause, stop, resume waiting thread. Where <strong>recovers the
fault-tolerant workflow</strong> and <strong>restores the waiting
thread</strong> The two command types are used by the sc [...]
+<p><strong>Timed schedule</strong>: The system uses <strong>quartz</strong>
distributed scheduler and supports the generation of cron expression
visualization</p>
+<p><strong>Dependency</strong>: The system does not only support
<strong>DAG</strong> Simple dependencies between predecessors and successor
nodes, but also provides <strong>task dependencies</strong> nodes, support for
<strong>custom task dependencies between processes</strong></p>
+<p><strong>Priority</strong>: Supports the priority of process instances and
task instances. If the process instance and task instance priority are not set,
the default is first in, first out.</p>
+<p><strong>Mail Alert</strong>: Support <strong>SQL Task</strong> Query Result
Email Send, Process Instance Run Result Email Alert and Fault Tolerant Alert
Notification</p>
+<p><strong>Failure policy</strong>: For tasks running in parallel, if there
are tasks that fail, two failure policy processing methods are provided.
<strong>Continue</strong> means that the status of the task is run in parallel
until the end of the process failure. <strong>End</strong> means that once a
failed task is found, Kill also drops the running parallel task and the process
ends.</p>
+<p><strong>Complement</strong>: Complement historical data, support
<strong>interval parallel and serial</strong> two complement methods</p>
+<h3>2.System architecture</h3>
+<h4>2.1 System Architecture Diagram</h4>
+<p align="center">
+ <img src="/img/architecture.jpg" alt="System Architecture Diagram" />
+ <p align="center">
+ <em>System Architecture Diagram</em>
+ </p>
+</p>
+<h4>2.2 Architectural description</h4>
+<ul>
+<li>
+<p><strong>MasterServer</strong></p>
+<p>MasterServer adopts the distributed non-central design concept.
MasterServer is mainly responsible for DAG task split, task submission
monitoring, and monitoring the health status of other MasterServer and
WorkerServer.
+When the MasterServer service starts, it registers a temporary node with
Zookeeper, and listens to the Zookeeper temporary node state change for fault
tolerance processing.</p>
+<h5>The service mainly contains:</h5>
+<ul>
+<li>
+<p><strong>Distributed Quartz</strong> distributed scheduling component,
mainly responsible for the start and stop operation of the scheduled task. When
the quartz picks up the task, the master internally has a thread pool to be
responsible for the subsequent operations of the task.</p>
+</li>
+<li>
+<p><strong>MasterSchedulerThread</strong> is a scan thread that periodically
scans the <strong>command</strong> table in the database for different business
operations based on different <strong>command types</strong></p>
+</li>
+<li>
+<p><strong>MasterExecThread</strong> is mainly responsible for DAG task
segmentation, task submission monitoring, logic processing of various command
types</p>
+</li>
+<li>
+<p><strong>MasterTaskExecThread</strong> is mainly responsible for task
persistence</p>
+</li>
+</ul>
+</li>
+<li>
+<p><strong>WorkerServer</strong></p>
+<ul>
+<li>
+<p>WorkerServer also adopts a distributed, non-central design concept.
WorkerServer is mainly responsible for task execution and providing log
services. When the WorkerServer service starts, it registers the temporary node
with Zookeeper and maintains the heartbeat.</p>
+<h5>This service contains:</h5>
+<ul>
+<li><strong>FetchTaskThread</strong> is mainly responsible for continuously
receiving tasks from <strong>Task Queue</strong> and calling
<strong>TaskScheduleThread</strong> corresponding executors according to
different task types.</li>
+<li><strong>LoggerServer</strong> is an RPC service that provides functions
such as log fragment viewing, refresh and download.</li>
+</ul>
+</li>
+<li>
+<p><strong>ZooKeeper</strong></p>
+<p>The ZooKeeper service, the MasterServer and the WorkerServer nodes in the
system all use the ZooKeeper for cluster management and fault tolerance. In
addition, the system also performs event monitoring and distributed locking
based on ZooKeeper.
+We have also implemented queues based on Redis, but we hope that
DolphinScheduler relies on as few components as possible, so we finally removed
the Redis implementation.</p>
+</li>
+<li>
+<p><strong>Task Queue</strong></p>
+<p>The task queue operation is provided. Currently, the queue is also
implemented based on Zookeeper. Since there is less information stored in the
queue, there is no need to worry about too much data in the queue. In fact, we
have over-measured a million-level data storage queue, which has no effect on
system stability and performance.</p>
+</li>
+<li>
+<p><strong>Alert</strong></p>
+<p>Provides alarm-related interfaces. The interfaces mainly include
<strong>Alarms</strong>. The storage, query, and notification functions of the
two types of alarm data. The notification function has two types: <strong>mail
notification</strong> and <strong>SNMP (not yet implemented)</strong>.</p>
+</li>
+<li>
+<p><strong>API</strong></p>
+<p>The API interface layer is mainly responsible for processing requests from
the front-end UI layer. The service provides a RESTful api to provide request
services externally.
+Interfaces include workflow creation, definition, query, modification,
release, offline, manual start, stop, pause, resume, start execution from this
node, and more.</p>
+</li>
+<li>
+<p><strong>UI</strong></p>
+<p>The front-end page of the system provides various visual operation
interfaces of the system. For details, see the <a
href="/en-us/docs/user_doc/system-manual.html" target="_self">System User
Manual</a> section.</p>
+</li>
+</ul>
+</li>
+</ul>
+<h4>2.3 Architectural Design Ideas</h4>
+<h5>I. Decentralized vs centralization</h5>
+<h6>Centralization Thought</h6>
+<p>The centralized design concept is relatively simple. The nodes in the
distributed cluster are divided into two roles according to their roles:</p>
+<p align="center">
+ <img
src="https://analysys.github.io/easyscheduler_docs_cn/images/master_slave.png"
alt="master-slave role" width="50%" />
+ </p>
+<ul>
+<li>The role of Master is mainly responsible for task distribution and
supervising the health status of Slave. It can dynamically balance the task to
Slave, so that the Slave node will not be "busy" or
"free".</li>
+<li>The role of the Worker is mainly responsible for the execution of the task
and maintains the heartbeat with the Master so that the Master can assign tasks
to the Slave.</li>
+</ul>
+<p>Problems in the design of centralized :</p>
+<ul>
+<li>Once the Master has a problem, the group has no leader and the entire
cluster will crash. In order to solve this problem, most Master/Slave
architecture modes adopt the design scheme of the master and backup masters,
which can be hot standby or cold standby, automatic switching or manual
switching, and more and more new systems are available. Automatically elects
the ability to switch masters to improve system availability.</li>
+<li>Another problem is that if the Scheduler is on the Master, although it can
support different tasks in one DAG running on different machines, it will
generate overload of the Master. If the Scheduler is on the Slave, all tasks in
a DAG can only be submitted on one machine. If there are more parallel tasks,
the pressure on the Slave may be larger.</li>
+</ul>
+<h6>Decentralization</h6>
+ <p align="center"
+ <img
src="https://analysys.github.io/easyscheduler_docs_cn/images/decentralization.png"
alt="decentralized" width="50%" />
+ </p>
+<ul>
+<li>
+<p>In the decentralized design, there is usually no Master/Slave concept, all
roles are the same, the status is equal, the global Internet is a typical
decentralized distributed system, networked arbitrary node equipment down
machine , all will only affect a small range of features.</p>
+</li>
+<li>
+<p>The core design of decentralized design is that there is no
"manager" that is different from other nodes in the entire
distributed system, so there is no single point of failure problem. However,
since there is no "manager" node, each node needs to communicate with
other nodes to get the necessary machine information, and the unreliable line
of distributed system communication greatly increases the difficulty of
implementing the above functions.</p>
+</li>
+<li>
+<p>In fact, truly decentralized distributed systems are rare. Instead, dynamic
centralized distributed systems are constantly emerging. Under this
architecture, the managers in the cluster are dynamically selected, rather than
preset, and when the cluster fails, the nodes of the cluster will spontaneously
hold "meetings" to elect new "managers". Go to preside over
the work. The most typical case is the Etcd implemented in ZooKeeper and Go.</p>
+</li>
+<li>
+<p>Decentralization of DolphinScheduler is the registration of Master/Worker
to ZooKeeper. The Master Cluster and the Worker Cluster are not centered, and
the Zookeeper distributed lock is used to elect one Master or Worker as the
“manager” to perform the task.</p>
+</li>
+</ul>
+<h5>二、Distributed lock practice</h5>
+<p>DolphinScheduler uses ZooKeeper distributed locks to implement only one
Master to execute the Scheduler at the same time, or only one Worker to perform
task submission.</p>
+<ol>
+<li>The core process algorithm for obtaining distributed locks is as
follows</li>
+</ol>
+ <p align="center">
+ <img
src="https://analysys.github.io/easyscheduler_docs_cn/images/distributed_lock.png"
alt="Get Distributed Lock Process" width="50%" />
+ </p>
+<ol start="2">
+<li>Scheduler thread distributed lock implementation flow chart in
DolphinScheduler:</li>
+</ol>
+ <p align="center">
+ <img src="/img/distributed_lock_procss.png" alt="Get Distributed Lock
Process" width="50%" />
+ </p>
+<h5>Third, the thread is insufficient loop waiting problem</h5>
+<ul>
+<li>If there is no subprocess in a DAG, if the number of data in the Command
is greater than the threshold set by the thread pool, the direct process waits
or fails.</li>
+<li>If a large number of sub-processes are nested in a large DAG, the
following figure will result in a "dead" state:</li>
+</ul>
+ <p align="center">
+ <img
src="https://analysys.github.io/easyscheduler_docs_cn/images/lack_thread.png"
alt="Thread is not enough to wait for loop" width="50%" />
+ </p>
+<p>In the above figure, MainFlowThread waits for SubFlowThread1 to end,
SubFlowThread1 waits for SubFlowThread2 to end, SubFlowThread2 waits for
SubFlowThread3 to end, and SubFlowThread3 waits for a new thread in the thread
pool, then the entire DAG process cannot end, and thus the thread cannot be
released. This forms the state of the child parent process loop waiting. At
this point, the scheduling cluster will no longer be available unless a new
Master is started to add threads to brea [...]
+<p>It seems a bit unsatisfactory to start a new Master to break the deadlock,
so we proposed the following three options to reduce this risk:</p>
+<ol>
+<li>Calculate the sum of the threads of all Masters, and then calculate the
number of threads required for each DAG, that is, pre-calculate before the DAG
process is executed. Because it is a multi-master thread pool, the total number
of threads is unlikely to be obtained in real time.</li>
+<li>Judge the single master thread pool. If the thread pool is full, let the
thread fail directly.</li>
+<li>Add a Command type with insufficient resources. If the thread pool is
insufficient, the main process will be suspended. This way, the thread pool has
a new thread, which can make the process with insufficient resources hang up
and wake up again.</li>
+</ol>
+<p>Note: The Master Scheduler thread is FIFO-enabled when it gets the
Command.</p>
+<p>So we chose the third way to solve the problem of insufficient threads.</p>
+<h5>IV. Fault Tolerant Design</h5>
+<p>Fault tolerance is divided into service fault tolerance and task retry.
Service fault tolerance is divided into two types: Master Fault Tolerance and
Worker Fault Tolerance.</p>
+<h6>1. Downtime fault tolerance</h6>
+<p>Service fault tolerance design relies on ZooKeeper's Watcher mechanism. The
implementation principle is as follows:</p>
+ <p align="center">
+ <img
src="https://analysys.github.io/easyscheduler_docs_cn/images/fault-tolerant.png"
alt="DolphinScheduler Fault Tolerant Design" width="40%" />
+ </p>
+<p>The Master monitors the directories of other Masters and Workers. If the
remove event is detected, the process instance is fault-tolerant or the task
instance is fault-tolerant according to the specific business logic.</p>
+<ul>
+<li>Master fault tolerance flow chart:</li>
+</ul>
+ <p align="center">
+ <img
src="https://analysys.github.io/easyscheduler_docs_cn/images/fault-tolerant_master.png"
alt="Master Fault Tolerance Flowchart" width="40%" />
+ </p>
+<p>After the ZooKeeper Master is fault-tolerant, it is rescheduled by the
Scheduler thread in DolphinScheduler. It traverses the DAG to find the
"Running" and "Submit Successful" tasks, and monitors the
status of its task instance for the "Running" task. You need to
determine whether the Task Queue already exists. If it exists, monitor the
status of the task instance. If it does not exist, resubmit the task
instance.</p>
+<ul>
+<li>Worker fault tolerance flow chart:</li>
+</ul>
+ <p align="center">
+ <img
src="https://analysys.github.io/easyscheduler_docs_cn/images/fault-tolerant_worker.png"
alt="Worker Fault Tolerance Flowchart" width="40%" />
+ </p>
+<p>Once the Master Scheduler thread finds the task instance as "need to
be fault tolerant", it takes over the task and resubmits.</p>
+<p>Note: Because the "network jitter" may cause the node to lose the
heartbeat of ZooKeeper in a short time, the node's remove event occurs. In this
case, we use the easiest way, that is, once the node has timeout connection
with ZooKeeper, it will directly stop the Master or Worker service.</p>
+<h6>2. Task failure retry</h6>
+<p>Here we must first distinguish between the concept of task failure retry,
process failure recovery, and process failure rerun:</p>
+<ul>
+<li>Task failure Retry is task level, which is automatically performed by the
scheduling system. For example, if a shell task sets the number of retries to 3
times, then the shell task will try to run up to 3 times after failing to
run.</li>
+<li>Process failure recovery is process level, is done manually, recovery can
only be performed <strong>from the failed node</strong> or <strong>from the
current node</strong></li>
+<li>Process failure rerun is also process level, is done manually, rerun is
from the start node</li>
+</ul>
+<p>Next, let's talk about the topic, we divided the task nodes in the workflow
into two types.</p>
+<ul>
+<li>One is a business node, which corresponds to an actual script or
processing statement, such as a Shell node, an MR node, a Spark node, a
dependent node, and so on.</li>
+<li>There is also a logical node, which does not do the actual script or
statement processing, but the logical processing of the entire process flow,
such as sub-flow sections.</li>
+</ul>
+<p>Each <strong>service node</strong> can configure the number of failed
retries. When the task node fails, it will automatically retry until it
succeeds or exceeds the configured number of retries. <strong>Logical
node</strong> does not support failed retry. But the tasks in the logical nodes
support retry.</p>
+<p>If there is a task failure in the workflow that reaches the maximum number
of retries, the workflow will fail to stop, and the failed workflow can be
manually rerun or process resumed.</p>
+<h5>V. Task priority design</h5>
+<p>In the early scheduling design, if there is no priority design and fair
scheduling design, it will encounter the situation that the task submitted
first may be completed simultaneously with the task submitted subsequently, but
the priority of the process or task cannot be set. We have redesigned this, and
we are currently designing it as follows:</p>
+<ul>
+<li>
+<p>According to <strong>different process instance priority</strong>
prioritizes <strong>same process instance priority</strong> prioritizes
<strong>task priority within the same process</strong> takes precedence over
<strong>same process</strong> commit order from high Go to low for task
processing.</p>
+<ul>
+<li>
+<p>The specific implementation is to resolve the priority according to the
json of the task instance, and then save the <strong>process instance priority
_ process instance id_task priority _ task id</strong> information in the
ZooKeeper task queue, when obtained from the task queue, Through string
comparison, you can get the task that needs to be executed first.</p>
+<ul>
+<li>
+<p>The priority of the process definition is that some processes need to be
processed before other processes. This can be configured at the start of the
process or at the time of scheduled start. There are 5 levels, followed by
HIGHEST, HIGH, MEDIUM, LOW, and LOWEST. As shown below</p>
+<p align="center">
+ <img
src="https://analysys.github.io/easyscheduler_docs_cn/images/process_priority.png"
alt="Process Priority Configuration" width="40%" />
+ </p>
+</li>
+<li>
+<p>The priority of the task is also divided into 5 levels, followed by
HIGHEST, HIGH, MEDIUM, LOW, and LOWEST. As shown below</p>
+<p align="center">
+ <img
src="https://analysys.github.io/easyscheduler_docs_cn/images/task_priority.png"
alt="task priority configuration" width="35%" />
+ </p>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<h5>VI. Logback and gRPC implement log access</h5>
+<ul>
+<li>Since the Web (UI) and Worker are not necessarily on the same machine,
viewing the log is not as it is for querying local files. There are two options:
+<ul>
+<li>Put the logs on the ES search engine</li>
+<li>Obtain remote log information through gRPC communication</li>
+</ul>
+</li>
+<li>Considering the lightweightness of DolphinScheduler as much as possible,
gRPC was chosen to implement remote access log information.</li>
+</ul>
+ <p align="center">
+ <img src="https://analysys.github.io/easyscheduler_docs_cn/images/grpc.png"
alt="grpc remote access" width="50%" />
+ </p>
+<ul>
+<li>We use a custom Logback FileAppender and Filter function to generate a log
file for each task instance.</li>
+<li>The main implementation of FileAppender is as follows:</li>
+</ul>
+<pre><code class="language-java"> <span class="hljs-comment">/**
+ * task log appender
+ */</span>
+ Public <span class="hljs-class"><span class="hljs-keyword">class</span> <span
class="hljs-title">TaskLogAppender</span> <span
class="hljs-keyword">extends</span> <span
class="hljs-title">FileAppender</span><<span
class="hljs-title">ILoggingEvent</span> </span>{
+
+ ...
+
+ <span class="hljs-meta">@Override</span>
+ <span class="hljs-function">Protected <span
class="hljs-keyword">void</span> <span class="hljs-title">append</span><span
class="hljs-params">(ILoggingEvent event)</span> </span>{
+
+ If (currentlyActiveFile == <span class="hljs-keyword">null</span>){
+ currentlyActiveFile = getFile();
+ }
+ String activeFile = currentlyActiveFile;
+ <span class="hljs-comment">// thread name:
taskThreadName-processDefineId_processInstanceId_taskInstanceId</span>
+ String threadName = event.getThreadName();
+ String[] threadNameArr = threadName.split(<span
class="hljs-string">"-"</span>);
+ <span class="hljs-comment">// logId =
processDefineId_processInstanceId_taskInstanceId</span>
+ String logId = threadNameArr[<span class="hljs-number">1</span>];
+ ...
+ <span class="hljs-keyword">super</span>.subAppend(event);
+ }
+}
+</code></pre>
+<p>Generate a log in the form of /process definition id/process instance
id/task instance id.log</p>
+<ul>
+<li>Filter matches the thread name starting with TaskLogInfo:</li>
+<li>TaskLogFilter is implemented as follows:</li>
+</ul>
+<pre><code class="language-java"> <span class="hljs-comment">/**
+ * task log filter
+ */</span>
+Public <span class="hljs-class"><span class="hljs-keyword">class</span> <span
class="hljs-title">TaskLogFilter</span> <span
class="hljs-keyword">extends</span> <span
class="hljs-title">Filter</span><<span
class="hljs-title">ILoggingEvent</span> </span>{
+
+ <span class="hljs-meta">@Override</span>
+ <span class="hljs-function">Public FilterReply <span
class="hljs-title">decide</span><span class="hljs-params">(ILoggingEvent
event)</span> </span>{
+ If (event.getThreadName().startsWith(<span
class="hljs-string">"TaskLogInfo-"</span>)){
+ Return FilterReply.ACCEPT;
+ }
+ Return FilterReply.DENY;
+ }
+}
+</code></pre>
+<h3>summary</h3>
+<p>Starting from the scheduling, this paper introduces the architecture
principle and implementation ideas of the big data distributed workflow
scheduling system-DolphinScheduler. To be continued</p>
+</div></section><footer class="footer-container"><div class="footer-body"><img
src="/img/ds_gray.svg"/><div class="cols-container"><div class="col
col-12"><h3>Disclaimer</h3><p>Apache DolphinScheduler (incubating) is an effort
undergoing incubation at The Apache Software Foundation (ASF), sponsored by
Incubator.
+Incubation is required of all newly accepted projects until a further review
indicates
+that the infrastructure, communications, and decision making process have
stabilized in a manner consistent with other successful ASF projects.
+While incubation status is not necessarily a reflection of the completeness or
stability of the code,
+it does indicate that the project has yet to be fully endorsed by the
ASF.</p></div><div class="col col-6"><dl><dt>Documentation</dt><dd><a
href="/en-us/docs/1.2.0/user_doc/architecture-design.html"
target="_self">Overview</a></dd><dd><a
href="/en-us/docs/1.2.0/user_doc/quick-start.html" target="_self">Quick
start</a></dd><dd><a href="/en-us/docs/1.2.0/user_doc/backend-development.html"
target="_self">Developer guide</a></dd></dl></div><div class="col
col-6"><dl><dt>ASF</dt><dd><a href=" [...]
+ <script
src="https://f.alicdn.com/react/15.4.1/react-with-addons.min.js"></script>
+ <script
src="https://f.alicdn.com/react/15.4.1/react-dom.min.js"></script>
+ <script>
+ window.rootPath = '';
+ </script>
+ <script src="/build/documentation.js"></script>
+</body>
+</html>
\ No newline at end of file
diff --git a/en-us/docs/1.2.1/user_doc/architecture-design.json
b/en-us/docs/1.2.1/user_doc/architecture-design.json
new file mode 100644
index 0000000..180bbb4
--- /dev/null
+++ b/en-us/docs/1.2.1/user_doc/architecture-design.json
@@ -0,0 +1,6 @@
+{
+ "filename": "architecture-design.md",
+ "__html": "<h2>Architecture Design</h2>\n<p>Before explaining the
architecture of the schedule system, let us first understand the common nouns
of the schedule system.</p>\n<h3>1.Noun
Interpretation</h3>\n<p><strong>DAG:</strong> Full name Directed Acyclic
Graph,referred to as DAG。Tasks in the workflow are assembled in the form of
directed acyclic graphs, which are topologically traversed from nodes with zero
indegrees of ingress until there are no successor nodes. For example, the fol
[...]
+ "link": "/en-us/docs/1.2.1/user_doc/architecture-design.html",
+ "meta": {}
+}
\ No newline at end of file
diff --git a/en-us/docs/1.2.1/user_doc/metadata-1.2.html
b/en-us/docs/1.2.1/user_doc/metadata-1.2.html
new file mode 100644
index 0000000..e5e19cc
--- /dev/null
+++ b/en-us/docs/1.2.1/user_doc/metadata-1.2.html
@@ -0,0 +1,659 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0,
maximum-scale=1.0, user-scalable=no">
+ <meta name="keywords" content="metadata-1.2" />
+ <meta name="description" content="metadata-1.2" />
+ <!-- 网页标签标题 -->
+ <title>metadata-1.2</title>
+ <link rel="shortcut icon" href="/img/docsite.ico"/>
+ <link rel="stylesheet" href="/build/documentation.css" />
+</head>
+<body>
+ <div id="root"><div class="documentation-page"
data-reactroot=""><header class="header-container header-container-normal"><div
class="header-body"><a href="/en-us/index.html"><img class="logo"
src="/img/hlogo_colorful.svg"/></a><div class="search search-normal"><span
class="icon-search"></span></div><span class="language-switch
language-switch-normal">中</span><div class="header-menu"><img
class="header-menu-toggle" src="/img/system/menu_gray.png"/><div><ul
class="ant-menu blackClass ant [...]
+<p><a name="V5KOl"></a></p>
+<h3>Dolphin Scheduler 1.2 DB Table Overview</h3>
+<table>
+<thead>
+<tr>
+<th style="text-align:center">Table Name</th>
+<th style="text-align:center">Comment</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align:center">t_ds_access_token</td>
+<td style="text-align:center">token for access ds backend</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_alert</td>
+<td style="text-align:center">alert detail</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_alertgroup</td>
+<td style="text-align:center">alert group</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_command</td>
+<td style="text-align:center">command detail</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_datasource</td>
+<td style="text-align:center">data source</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_error_command</td>
+<td style="text-align:center">error command detail</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_process_definition</td>
+<td style="text-align:center">process difinition</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_process_instance</td>
+<td style="text-align:center">process instance</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_project</td>
+<td style="text-align:center">project</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_queue</td>
+<td style="text-align:center">queue</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_relation_datasource_user</td>
+<td style="text-align:center">datasource related to user</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_relation_process_instance</td>
+<td style="text-align:center">sub process</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_relation_project_user</td>
+<td style="text-align:center">project related to user</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_relation_resources_user</td>
+<td style="text-align:center">resource related to user</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_relation_udfs_user</td>
+<td style="text-align:center">UDF related to user</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_relation_user_alertgroup</td>
+<td style="text-align:center">alert group related to user</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_resources</td>
+<td style="text-align:center">resoruce center file</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_schedules</td>
+<td style="text-align:center">process difinition schedule</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_session</td>
+<td style="text-align:center">user login session</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_task_instance</td>
+<td style="text-align:center">task instance</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_tenant</td>
+<td style="text-align:center">tenant</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_udfs</td>
+<td style="text-align:center">UDF resource</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_user</td>
+<td style="text-align:center">user detail</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_version</td>
+<td style="text-align:center">ds version</td>
+</tr>
+<tr>
+<td style="text-align:center">t_ds_worker_group</td>
+<td style="text-align:center">worker group</td>
+</tr>
+</tbody>
+</table>
+<hr>
+<p><a name="XCLy1"></a></p>
+<h3>E-R Diagram</h3>
+<p><a name="5hWWZ"></a></p>
+<h4>User Queue DataSource</h4>
+<p><img src="/img/metadata-erd/user-queue-datasource.png" alt="image.png"></p>
+<ul>
+<li>Multiple users can belong to one tenant</li>
+<li>The queue field in t_ds_user table stores the queue_name information in
t_ds_queue table, but t_ds_tenant stores queue infomation using queue_id.
During the execution of the process definition, the user queue has the highest
priority. If the user queue is empty, the tenant queue is used.</li>
+<li>The user_id field in the t_ds_datasource table indicates the user who
created the data source. The user_id in t_ds_relation_datasource_user indicates
the user who has permission to the data source.
+<a name="7euSN"></a></li>
+</ul>
+<h4>Project Resource Alert</h4>
+<p><img src="/img/metadata-erd/project-resource-alert.png" alt="image.png"></p>
+<ul>
+<li>User can have multiple projects, User project authorization completes the
relationship binding using project_id and user_id in t_ds_relation_project_user
table</li>
+<li>The user_id in the t_ds_projcet table represents the user who created the
project, and the user_id in the t_ds_relation_project_user table represents
users who have permission to the project</li>
+<li>The user_id in the t_ds_resources table represents the user who created
the resource, and the user_id in t_ds_relation_resources_user represents the
user who has permissions to the resource</li>
+<li>The user_id in the t_ds_udfs table represents the user who created the
UDF, and the user_id in the t_ds_relation_udfs_user table represents a user who
has permission to the UDF
+<a name="JEw4v"></a></li>
+</ul>
+<h4>Command Process Task</h4>
+<p><img src="/img/metadata-erd/command.png" alt="image.png"><br /><img
src="/img/metadata-erd/process-task.png" alt="image.png"></p>
+<ul>
+<li>A project has multiple process definitions, a process definition can
generate multiple process instances, and a process instance can generate
multiple task instances</li>
+<li>The t_ds_schedulers table stores the timing schedule information for
process difinition</li>
+<li>The data stored in the t_ds_relation_process_instance table is used to
deal with that the process definition contains sub-processes,
parent_process_instance_id field represents the id of the main process instance
containing the child process, process_instance_id field represents the id of
the sub-process instance, parent_task_instance_id field represents the task
instance id of the sub-process node</li>
+<li>The process instance table and the task instance table correspond to the
t_ds_process_instance table and the t_ds_task_instance table, respectively.</li>
+</ul>
+<hr>
+<p><a name="yd79T"></a></p>
+<h3>Core Table Schema</h3>
+<p><a name="6bVhH"></a></p>
+<h4>t_ds_process_definition</h4>
+<table>
+<thead>
+<tr>
+<th>Field</th>
+<th>Type</th>
+<th>Comment</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>id</td>
+<td>int</td>
+<td>primary key</td>
+</tr>
+<tr>
+<td>name</td>
+<td>varchar</td>
+<td>process definition name</td>
+</tr>
+<tr>
+<td>version</td>
+<td>int</td>
+<td>process definition version</td>
+</tr>
+<tr>
+<td>release_state</td>
+<td>tinyint</td>
+<td>process definition release state:0:offline,1:online</td>
+</tr>
+<tr>
+<td>project_id</td>
+<td>int</td>
+<td>project id</td>
+</tr>
+<tr>
+<td>user_id</td>
+<td>int</td>
+<td>process definition creator id</td>
+</tr>
+<tr>
+<td>process_definition_json</td>
+<td>longtext</td>
+<td>process definition json content</td>
+</tr>
+<tr>
+<td>description</td>
+<td>text</td>
+<td>process difinition desc</td>
+</tr>
+<tr>
+<td>global_params</td>
+<td>text</td>
+<td>global parameters</td>
+</tr>
+<tr>
+<td>flag</td>
+<td>tinyint</td>
+<td>process is available: 0 not available, 1 available</td>
+</tr>
+<tr>
+<td>locations</td>
+<td>text</td>
+<td>Node location information</td>
+</tr>
+<tr>
+<td>connects</td>
+<td>text</td>
+<td>Node connection information</td>
+</tr>
+<tr>
+<td>receivers</td>
+<td>text</td>
+<td>receivers</td>
+</tr>
+<tr>
+<td>receivers_cc</td>
+<td>text</td>
+<td>carbon copy list</td>
+</tr>
+<tr>
+<td>create_time</td>
+<td>datetime</td>
+<td>create time</td>
+</tr>
+<tr>
+<td>timeout</td>
+<td>int</td>
+<td>timeout</td>
+</tr>
+<tr>
+<td>tenant_id</td>
+<td>int</td>
+<td>tenant id</td>
+</tr>
+<tr>
+<td>update_time</td>
+<td>datetime</td>
+<td>update time</td>
+</tr>
+</tbody>
+</table>
+<p><a name="t5uxM"></a></p>
+<h4>t_ds_process_instance</h4>
+<table>
+<thead>
+<tr>
+<th>Field</th>
+<th>Type</th>
+<th>Comment</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>id</td>
+<td>int</td>
+<td>primary key</td>
+</tr>
+<tr>
+<td>name</td>
+<td>varchar</td>
+<td>process instance name</td>
+</tr>
+<tr>
+<td>process_definition_id</td>
+<td>int</td>
+<td>process definition id</td>
+</tr>
+<tr>
+<td>state</td>
+<td>tinyint</td>
+<td>process instance Status: 0 commit succeeded, 1 running, 2 prepare to
pause, 3 pause, 4 prepare to stop, 5 stop, 6 fail, 7 succeed, 8 need fault
tolerance, 9 kill, 10 wait for thread, 11 wait for dependency to complete</td>
+</tr>
+<tr>
+<td>recovery</td>
+<td>tinyint</td>
+<td>process instance failover flag:0:normal,1:failover instance</td>
+</tr>
+<tr>
+<td>start_time</td>
+<td>datetime</td>
+<td>process instance start time</td>
+</tr>
+<tr>
+<td>end_time</td>
+<td>datetime</td>
+<td>process instance end time</td>
+</tr>
+<tr>
+<td>run_times</td>
+<td>int</td>
+<td>process instance run times</td>
+</tr>
+<tr>
+<td>host</td>
+<td>varchar</td>
+<td>process instance host</td>
+</tr>
+<tr>
+<td>command_type</td>
+<td>tinyint</td>
+<td>command type:0 start ,1 Start from the current node,2 Resume a
fault-tolerant process,3 Resume Pause Process, 4 Execute from the failed node,5
Complement, 6 dispatch, 7 re-run, 8 pause, 9 stop ,10 Resume waiting thread</td>
+</tr>
+<tr>
+<td>command_param</td>
+<td>text</td>
+<td>json command parameters</td>
+</tr>
+<tr>
+<td>task_depend_type</td>
+<td>tinyint</td>
+<td>task depend type. 0: only current node,1:before the node,2:later nodes</td>
+</tr>
+<tr>
+<td>max_try_times</td>
+<td>tinyint</td>
+<td>max try times</td>
+</tr>
+<tr>
+<td>failure_strategy</td>
+<td>tinyint</td>
+<td>failure strategy. 0:end the process when node failed,1:continue running
the other nodes when node failed</td>
+</tr>
+<tr>
+<td>warning_type</td>
+<td>tinyint</td>
+<td>warning type. 0:no warning,1:warning if process success,2:warning if
process failed,3:warning if success</td>
+</tr>
+<tr>
+<td>warning_group_id</td>
+<td>int</td>
+<td>warning group id</td>
+</tr>
+<tr>
+<td>schedule_time</td>
+<td>datetime</td>
+<td>schedule time</td>
+</tr>
+<tr>
+<td>command_start_time</td>
+<td>datetime</td>
+<td>command start time</td>
+</tr>
+<tr>
+<td>global_params</td>
+<td>text</td>
+<td>global parameters</td>
+</tr>
+<tr>
+<td>process_instance_json</td>
+<td>longtext</td>
+<td>process instance json(copy的process definition 的json)</td>
+</tr>
+<tr>
+<td>flag</td>
+<td>tinyint</td>
+<td>process instance is available: 0 not available, 1 available</td>
+</tr>
+<tr>
+<td>update_time</td>
+<td>timestamp</td>
+<td>update time</td>
+</tr>
+<tr>
+<td>is_sub_process</td>
+<td>int</td>
+<td>whether the process is sub process: 1 sub-process,0 not sub-process</td>
+</tr>
+<tr>
+<td>executor_id</td>
+<td>int</td>
+<td>executor id</td>
+</tr>
+<tr>
+<td>locations</td>
+<td>text</td>
+<td>Node location information</td>
+</tr>
+<tr>
+<td>connects</td>
+<td>text</td>
+<td>Node connection information</td>
+</tr>
+<tr>
+<td>history_cmd</td>
+<td>text</td>
+<td>history commands of process instance operation</td>
+</tr>
+<tr>
+<td>dependence_schedule_times</td>
+<td>text</td>
+<td>depend schedule fire time</td>
+</tr>
+<tr>
+<td>process_instance_priority</td>
+<td>int</td>
+<td>process instance priority. 0 Highest,1 High,2 Medium,3 Low,4 Lowest</td>
+</tr>
+<tr>
+<td>worker_group_id</td>
+<td>int</td>
+<td>worker group id</td>
+</tr>
+<tr>
+<td>timeout</td>
+<td>int</td>
+<td>time out</td>
+</tr>
+<tr>
+<td>tenant_id</td>
+<td>int</td>
+<td>tenant id</td>
+</tr>
+</tbody>
+</table>
+<p><a name="tHZsY"></a></p>
+<h4>t_ds_task_instance</h4>
+<table>
+<thead>
+<tr>
+<th>Field</th>
+<th>Type</th>
+<th>Comment</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>id</td>
+<td>int</td>
+<td>primary key</td>
+</tr>
+<tr>
+<td>name</td>
+<td>varchar</td>
+<td>task name</td>
+</tr>
+<tr>
+<td>task_type</td>
+<td>varchar</td>
+<td>task type</td>
+</tr>
+<tr>
+<td>process_definition_id</td>
+<td>int</td>
+<td>process definition id</td>
+</tr>
+<tr>
+<td>process_instance_id</td>
+<td>int</td>
+<td>process instance id</td>
+</tr>
+<tr>
+<td>task_json</td>
+<td>longtext</td>
+<td>task content json</td>
+</tr>
+<tr>
+<td>state</td>
+<td>tinyint</td>
+<td>Status: 0 commit succeeded, 1 running, 2 prepare to pause, 3 pause, 4
prepare to stop, 5 stop, 6 fail, 7 succeed, 8 need fault tolerance, 9 kill, 10
wait for thread, 11 wait for dependency to complete</td>
+</tr>
+<tr>
+<td>submit_time</td>
+<td>datetime</td>
+<td>task submit time</td>
+</tr>
+<tr>
+<td>start_time</td>
+<td>datetime</td>
+<td>task start time</td>
+</tr>
+<tr>
+<td>end_time</td>
+<td>datetime</td>
+<td>task end time</td>
+</tr>
+<tr>
+<td>host</td>
+<td>varchar</td>
+<td>host of task running on</td>
+</tr>
+<tr>
+<td>execute_path</td>
+<td>varchar</td>
+<td>task execute path in the host</td>
+</tr>
+<tr>
+<td>log_path</td>
+<td>varchar</td>
+<td>task log path</td>
+</tr>
+<tr>
+<td>alert_flag</td>
+<td>tinyint</td>
+<td>whether alert</td>
+</tr>
+<tr>
+<td>retry_times</td>
+<td>int</td>
+<td>task retry times</td>
+</tr>
+<tr>
+<td>pid</td>
+<td>int</td>
+<td>pid of task</td>
+</tr>
+<tr>
+<td>app_link</td>
+<td>varchar</td>
+<td>yarn app id</td>
+</tr>
+<tr>
+<td>flag</td>
+<td>tinyint</td>
+<td>taskinstance is available: 0 not available, 1 available</td>
+</tr>
+<tr>
+<td>retry_interval</td>
+<td>int</td>
+<td>retry interval when task failed</td>
+</tr>
+<tr>
+<td>max_retry_times</td>
+<td>int</td>
+<td>max retry times</td>
+</tr>
+<tr>
+<td>task_instance_priority</td>
+<td>int</td>
+<td>task instance priority:0 Highest,1 High,2 Medium,3 Low,4 Lowest</td>
+</tr>
+<tr>
+<td>worker_group_id</td>
+<td>int</td>
+<td>worker group id</td>
+</tr>
+</tbody>
+</table>
+<p><a name="gLGtm"></a></p>
+<h4>t_ds_command</h4>
+<table>
+<thead>
+<tr>
+<th>Field</th>
+<th>Type</th>
+<th>Comment</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>id</td>
+<td>int</td>
+<td>primary key</td>
+</tr>
+<tr>
+<td>command_type</td>
+<td>tinyint</td>
+<td>Command type: 0 start workflow, 1 start execution from current node, 2
resume fault-tolerant workflow, 3 resume pause process, 4 start execution from
failed node, 5 complement, 6 schedule, 7 rerun, 8 pause, 9 stop, 10 resume
waiting thread</td>
+</tr>
+<tr>
+<td>process_definition_id</td>
+<td>int</td>
+<td>process definition id</td>
+</tr>
+<tr>
+<td>command_param</td>
+<td>text</td>
+<td>json command parameters</td>
+</tr>
+<tr>
+<td>task_depend_type</td>
+<td>tinyint</td>
+<td>Node dependency type: 0 current node, 1 forward, 2 backward</td>
+</tr>
+<tr>
+<td>failure_strategy</td>
+<td>tinyint</td>
+<td>Failed policy: 0 end, 1 continue</td>
+</tr>
+<tr>
+<td>warning_type</td>
+<td>tinyint</td>
+<td>Alarm type: 0 is not sent, 1 process is sent successfully, 2 process is
sent failed, 3 process is sent successfully and all failures are sent</td>
+</tr>
+<tr>
+<td>warning_group_id</td>
+<td>int</td>
+<td>warning group</td>
+</tr>
+<tr>
+<td>schedule_time</td>
+<td>datetime</td>
+<td>schedule time</td>
+</tr>
+<tr>
+<td>start_time</td>
+<td>datetime</td>
+<td>start time</td>
+</tr>
+<tr>
+<td>executor_id</td>
+<td>int</td>
+<td>executor id</td>
+</tr>
+<tr>
+<td>dependence</td>
+<td>varchar</td>
+<td>dependence</td>
+</tr>
+<tr>
+<td>update_time</td>
+<td>datetime</td>
+<td>update time</td>
+</tr>
+<tr>
+<td>process_instance_priority</td>
+<td>int</td>
+<td>process instance priority: 0 Highest,1 High,2 Medium,3 Low,4 Lowest</td>
+</tr>
+<tr>
+<td>worker_group_id</td>
+<td>int</td>
+<td>worker group id</td>
+</tr>
+</tbody>
+</table>
+</div></section><footer class="footer-container"><div class="footer-body"><img
src="/img/ds_gray.svg"/><div class="cols-container"><div class="col
col-12"><h3>Disclaimer</h3><p>Apache DolphinScheduler (incubating) is an effort
undergoing incubation at The Apache Software Foundation (ASF), sponsored by
Incubator.
+Incubation is required of all newly accepted projects until a further review
indicates
+that the infrastructure, communications, and decision making process have
stabilized in a manner consistent with other successful ASF projects.
+While incubation status is not necessarily a reflection of the completeness or
stability of the code,
+it does indicate that the project has yet to be fully endorsed by the
ASF.</p></div><div class="col col-6"><dl><dt>Documentation</dt><dd><a
href="/en-us/docs/1.2.0/user_doc/architecture-design.html"
target="_self">Overview</a></dd><dd><a
href="/en-us/docs/1.2.0/user_doc/quick-start.html" target="_self">Quick
start</a></dd><dd><a href="/en-us/docs/1.2.0/user_doc/backend-development.html"
target="_self">Developer guide</a></dd></dl></div><div class="col
col-6"><dl><dt>ASF</dt><dd><a href=" [...]
+ <script
src="https://f.alicdn.com/react/15.4.1/react-with-addons.min.js"></script>
+ <script
src="https://f.alicdn.com/react/15.4.1/react-dom.min.js"></script>
+ <script>
+ window.rootPath = '';
+ </script>
+ <script src="/build/documentation.js"></script>
+</body>
+</html>
\ No newline at end of file
diff --git a/en-us/docs/1.2.1/user_doc/metadata-1.2.json
b/en-us/docs/1.2.1/user_doc/metadata-1.2.json
new file mode 100644
index 0000000..29160ad
--- /dev/null
+++ b/en-us/docs/1.2.1/user_doc/metadata-1.2.json
@@ -0,0 +1,6 @@
+{
+ "filename": "metadata-1.2.md",
+ "__html": "<h1>Dolphin Scheduler 1.2 MetaData</h1>\n<p><a
name=\"V5KOl\"></a></p>\n<h3>Dolphin Scheduler 1.2 DB Table
Overview</h3>\n<table>\n<thead>\n<tr>\n<th style=\"text-align:center\">Table
Name</th>\n<th
style=\"text-align:center\">Comment</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td
style=\"text-align:center\">t_ds_access_token</td>\n<td
style=\"text-align:center\">token for access ds backend</td>\n</tr>\n<tr>\n<td
style=\"text-align:center\">t_ds_alert</td>\n<td style=\"text-align [...]
+ "link": "/en-us/docs/1.2.1/user_doc/metadata-1.2.html",
+ "meta": {}
+}
\ No newline at end of file
diff --git a/en-us/docs/1.2.1/user_doc/plugin-development.html
b/en-us/docs/1.2.1/user_doc/plugin-development.html
new file mode 100644
index 0000000..e40d1c6
--- /dev/null
+++ b/en-us/docs/1.2.1/user_doc/plugin-development.html
@@ -0,0 +1,81 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0,
maximum-scale=1.0, user-scalable=no">
+ <meta name="keywords" content="plugin-development" />
+ <meta name="description" content="plugin-development" />
+ <!-- 网页标签标题 -->
+ <title>plugin-development</title>
+ <link rel="shortcut icon" href="/img/docsite.ico"/>
+ <link rel="stylesheet" href="/build/documentation.css" />
+</head>
+<body>
+ <div id="root"><div class="documentation-page"
data-reactroot=""><header class="header-container header-container-normal"><div
class="header-body"><a href="/en-us/index.html"><img class="logo"
src="/img/hlogo_colorful.svg"/></a><div class="search search-normal"><span
class="icon-search"></span></div><span class="language-switch
language-switch-normal">中</span><div class="header-menu"><img
class="header-menu-toggle" src="/img/system/menu_gray.png"/><div><ul
class="ant-menu blackClass ant [...]
+<p>Remind:Currently, task plugin development does not support hot
deployment.</p>
+<h3>Shell-based tasks</h3>
+<h4>YARN-based calculations (see MapReduceTask)</h4>
+<ul>
+<li>Need to be <strong>cn.dolphinscheduler.server.worker.task</strong> Down
<strong>TaskManager</strong> Create a custom task in the class (also need to
register the corresponding task type in TaskType)</li>
+<li>Need to inherit<strong>cn.dolphinscheduler.server.worker.task</strong>
Down <strong>AbstractYarnTask</strong></li>
+<li>Constructor Scheduling <strong>AbstractYarnTask</strong> Construction
method</li>
+<li>Inherit <strong>AbstractParameters</strong> Custom task parameter
entity</li>
+<li>Rewrite <strong>AbstractTask</strong> of <strong>init</strong> Parsing in
method<strong>Custom task parameters</strong></li>
+<li>Rewrite <strong>buildCommand</strong> Encapsulation command</li>
+</ul>
+<h4>Non-YARN-based calculations (see ShellTask)</h4>
+<ul>
+<li>
+<p>Need to be <strong>cn.dolphinscheduler.server.worker.task</strong> Down
<strong>TaskManager</strong> A custom task</p>
+</li>
+<li>
+<p>Need to inherit<strong>cn.dolphinscheduler.server.worker.task</strong> Down
<strong>AbstractTask</strong></p>
+</li>
+<li>
+<p>Instantiation in constructor <strong>ShellCommandExecutor</strong></p>
+<pre><code>public ShellTask(TaskProps props, Logger logger) {
+ super(props, logger);
+
+ this.taskDir = props.getTaskDir();
+
+ this.processTask = new ShellCommandExecutor(this::logHandle,
+ props.getTaskDir(), props.getTaskAppId(),
+ props.getTenantCode(), props.getEnvFile(), props.getTaskStartTime(),
+ props.getTaskTimeout(), logger);
+ this.processDao = DaoFactory.getDaoInstance(ProcessDao.class);
+}
+</code></pre>
+<p>Incoming custom tasks <strong>TaskProps</strong>And
custom<strong>Logger</strong>,TaskProps Encapsulate task information, Logger is
installed with custom log information</p>
+</li>
+<li>
+<p>Inherit <strong>AbstractParameters</strong> Custom task parameter entity</p>
+</li>
+<li>
+<p>Rewrite <strong>AbstractTask</strong> of <strong>init</strong> Parsing in
method<strong>Custom task parameter entity</strong></p>
+</li>
+<li>
+<p>Rewrite <strong>handle</strong> method,transfer
<strong>ShellCommandExecutor</strong> of <strong>run</strong> method,The first
parameter is passed in<strong>command</strong>,Pass the second parameter to
ProcessDao and set the corresponding <strong>exitStatusCode</strong></p>
+</li>
+</ul>
+<h3>Non-SHELL-based tasks (see SqlTask)</h3>
+<ul>
+<li>Need to be <strong>cn.dolphinscheduler.server.worker.task</strong> Down
<strong>TaskManager</strong> A custom task</li>
+<li>Need to inherit<strong>cn.dolphinscheduler.server.worker.task</strong>
Down <strong>AbstractTask</strong></li>
+<li>Inherit <strong>AbstractParameters</strong> Custom task parameter
entity</li>
+<li>Constructor or override <strong>AbstractTask</strong> of
<strong>init</strong> in the method, parse the custom task parameter entity</li>
+<li>Rewrite <strong>handle</strong> Methods to implement business logic and
set the corresponding<strong>exitStatusCode</strong></li>
+</ul>
+</div></section><footer class="footer-container"><div class="footer-body"><img
src="/img/ds_gray.svg"/><div class="cols-container"><div class="col
col-12"><h3>Disclaimer</h3><p>Apache DolphinScheduler (incubating) is an effort
undergoing incubation at The Apache Software Foundation (ASF), sponsored by
Incubator.
+Incubation is required of all newly accepted projects until a further review
indicates
+that the infrastructure, communications, and decision making process have
stabilized in a manner consistent with other successful ASF projects.
+While incubation status is not necessarily a reflection of the completeness or
stability of the code,
+it does indicate that the project has yet to be fully endorsed by the
ASF.</p></div><div class="col col-6"><dl><dt>Documentation</dt><dd><a
href="/en-us/docs/1.2.0/user_doc/architecture-design.html"
target="_self">Overview</a></dd><dd><a
href="/en-us/docs/1.2.0/user_doc/quick-start.html" target="_self">Quick
start</a></dd><dd><a href="/en-us/docs/1.2.0/user_doc/backend-development.html"
target="_self">Developer guide</a></dd></dl></div><div class="col
col-6"><dl><dt>ASF</dt><dd><a href=" [...]
+ <script
src="https://f.alicdn.com/react/15.4.1/react-with-addons.min.js"></script>
+ <script
src="https://f.alicdn.com/react/15.4.1/react-dom.min.js"></script>
+ <script>
+ window.rootPath = '';
+ </script>
+ <script src="/build/documentation.js"></script>
+</body>
+</html>
\ No newline at end of file
diff --git a/en-us/docs/1.2.1/user_doc/plugin-development.json
b/en-us/docs/1.2.1/user_doc/plugin-development.json
new file mode 100644
index 0000000..d9de6b7
--- /dev/null
+++ b/en-us/docs/1.2.1/user_doc/plugin-development.json
@@ -0,0 +1,6 @@
+{
+ "filename": "plugin-development.md",
+ "__html": "<h2>Task Plugin Development</h2>\n<p>Remind:Currently, task
plugin development does not support hot deployment.</p>\n<h3>Shell-based
tasks</h3>\n<h4>YARN-based calculations (see
MapReduceTask)</h4>\n<ul>\n<li>Need to be
<strong>cn.dolphinscheduler.server.worker.task</strong> Down
<strong>TaskManager</strong> Create a custom task in the class (also need to
register the corresponding task type in TaskType)</li>\n<li>Need to
inherit<strong>cn.dolphinscheduler.server.worker.task [...]
+ "link": "/en-us/docs/1.2.1/user_doc/plugin-development.html",
+ "meta": {}
+}
\ No newline at end of file
diff --git a/en-us/docs/1.2.1/user_doc/quick-start.html
b/en-us/docs/1.2.1/user_doc/quick-start.html
new file mode 100644
index 0000000..cc786e3
--- /dev/null
+++ b/en-us/docs/1.2.1/user_doc/quick-start.html
@@ -0,0 +1,101 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0,
maximum-scale=1.0, user-scalable=no">
+ <meta name="keywords" content="quick-start" />
+ <meta name="description" content="quick-start" />
+ <!-- 网页标签标题 -->
+ <title>quick-start</title>
+ <link rel="shortcut icon" href="/img/docsite.ico"/>
+ <link rel="stylesheet" href="/build/documentation.css" />
+</head>
+<body>
+ <div id="root"><div class="documentation-page"
data-reactroot=""><header class="header-container header-container-normal"><div
class="header-body"><a href="/en-us/index.html"><img class="logo"
src="/img/hlogo_colorful.svg"/></a><div class="search search-normal"><span
class="icon-search"></span></div><span class="language-switch
language-switch-normal">中</span><div class="header-menu"><img
class="header-menu-toggle" src="/img/system/menu_gray.png"/><div><ul
class="ant-menu blackClass ant [...]
+<ul>
+<li>
+<p>Administrator user login</p>
+<blockquote>
+<p>Address:192.168.xx.xx:8888 Username and
password:admin/dolphinscheduler123</p>
+</blockquote>
+</li>
+</ul>
+<p align="center">
+ <img src="/img/login_en.png" width="60%" />
+ </p>
+<ul>
+<li>Create queue</li>
+</ul>
+<p align="center">
+ <img src="/img/create-queue-en.png" width="60%" />
+ </p>
+<ul>
+<li>Create tenant <p align="center">
+<img src="/img/create-tenant-en.png" width="60%" />
+</li>
+</ul>
+ </p>
+<ul>
+<li>Creating Ordinary Users</li>
+</ul>
+<p align="center">
+ <img src="/img/create-user-en.png" width="60%" />
+ </p>
+<ul>
+<li>Create an alarm group</li>
+</ul>
+ <p align="center">
+ <img src="/img/alarm-group-en.png" width="60%" />
+ </p>
+<ul>
+<li>Create an worker group</li>
+</ul>
+ <p align="center">
+ <img src="/img/worker-group-en.png" width="60%" />
+ </p>
+<ul>
+<li>
+<p>Create an token</p>
+<p align="center">
+ <img src="/img/token-en.png" width="60%" />
+ </p>
+</li>
+<li>
+<p>Log in with regular users</p>
+</li>
+</ul>
+<blockquote>
+<p>Click on the user name in the upper right corner to "exit" and
re-use the normal user login.</p>
+</blockquote>
+<ul>
+<li>Project Management - > Create Project - > Click on Project Name</li>
+</ul>
+<p align="center">
+ <img src="/img/create_project_en.png" width="60%" />
+ </p>
+<ul>
+<li>Click Workflow Definition - > Create Workflow Definition - > Online
Process Definition</li>
+</ul>
+<p align="center">
+ <img src="/img/process_definition_en.png" width="60%" />
+ </p>
+<ul>
+<li>Running Process Definition - > Click Workflow Instance - > Click
Process Instance Name - > Double-click Task Node - > View Task Execution
Log</li>
+</ul>
+ <p align="center">
+ <img src="/img/log_en.png" width="60%" />
+</p>
+</div></section><footer class="footer-container"><div class="footer-body"><img
src="/img/ds_gray.svg"/><div class="cols-container"><div class="col
col-12"><h3>Disclaimer</h3><p>Apache DolphinScheduler (incubating) is an effort
undergoing incubation at The Apache Software Foundation (ASF), sponsored by
Incubator.
+Incubation is required of all newly accepted projects until a further review
indicates
+that the infrastructure, communications, and decision making process have
stabilized in a manner consistent with other successful ASF projects.
+While incubation status is not necessarily a reflection of the completeness or
stability of the code,
+it does indicate that the project has yet to be fully endorsed by the
ASF.</p></div><div class="col col-6"><dl><dt>Documentation</dt><dd><a
href="/en-us/docs/1.2.0/user_doc/architecture-design.html"
target="_self">Overview</a></dd><dd><a
href="/en-us/docs/1.2.0/user_doc/quick-start.html" target="_self">Quick
start</a></dd><dd><a href="/en-us/docs/1.2.0/user_doc/backend-development.html"
target="_self">Developer guide</a></dd></dl></div><div class="col
col-6"><dl><dt>ASF</dt><dd><a href=" [...]
+ <script
src="https://f.alicdn.com/react/15.4.1/react-with-addons.min.js"></script>
+ <script
src="https://f.alicdn.com/react/15.4.1/react-dom.min.js"></script>
+ <script>
+ window.rootPath = '';
+ </script>
+ <script src="/build/documentation.js"></script>
+</body>
+</html>
\ No newline at end of file
diff --git a/en-us/docs/1.2.1/user_doc/quick-start.json
b/en-us/docs/1.2.1/user_doc/quick-start.json
new file mode 100644
index 0000000..6fb2179
--- /dev/null
+++ b/en-us/docs/1.2.1/user_doc/quick-start.json
@@ -0,0 +1,6 @@
+{
+ "filename": "quick-start.md",
+ "__html": "<h1>Quick Start</h1>\n<ul>\n<li>\n<p>Administrator user
login</p>\n<blockquote>\n<p>Address:192.168.xx.xx:8888 Username and
password:admin/dolphinscheduler123</p>\n</blockquote>\n</li>\n</ul>\n<p
align=\"center\">\n <img src=\"/img/login_en.png\" width=\"60%\" />\n
</p>\n<ul>\n<li>Create queue</li>\n</ul>\n<p align=\"center\">\n <img
src=\"/img/create-queue-en.png\" width=\"60%\" />\n </p>\n<ul>\n<li>Create
tenant <p align=\"center\">\n<img src=\"/img/create-tenant-en. [...]
+ "link": "/en-us/docs/1.2.1/user_doc/quick-start.html",
+ "meta": {}
+}
\ No newline at end of file
diff --git a/en-us/docs/1.2.1/user_doc/system-manual.html
b/en-us/docs/1.2.1/user_doc/system-manual.html
new file mode 100644
index 0000000..4f4d30a
--- /dev/null
+++ b/en-us/docs/1.2.1/user_doc/system-manual.html
@@ -0,0 +1,776 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0,
maximum-scale=1.0, user-scalable=no">
+ <meta name="keywords" content="system-manual" />
+ <meta name="description" content="system-manual" />
+ <!-- 网页标签标题 -->
+ <title>system-manual</title>
+ <link rel="shortcut icon" href="/img/docsite.ico"/>
+ <link rel="stylesheet" href="/build/documentation.css" />
+</head>
+<body>
+ <div id="root"><div class="documentation-page"
data-reactroot=""><header class="header-container header-container-normal"><div
class="header-body"><a href="/en-us/index.html"><img class="logo"
src="/img/hlogo_colorful.svg"/></a><div class="search search-normal"><span
class="icon-search"></span></div><span class="language-switch
language-switch-normal">中</span><div class="header-menu"><img
class="header-menu-toggle" src="/img/system/menu_gray.png"/><div><ul
class="ant-menu blackClass ant [...]
+<h2>Operational Guidelines</h2>
+<h3>Home page</h3>
+<p>The homepage contains task status statistics, process status statistics,
and workflow definition statistics for all user projects.</p>
+<p align="center">
+ <img src="/img/home_en.png" width="80%" />
+ </p>
+<h3>Create a project</h3>
+<ul>
+<li>Click "Project - > Create Project", enter project name,
description, and click "Submit" to create a new project.</li>
+<li>Click on the project name to enter the project home page.</li>
+</ul>
+<p align="center">
+ <img src="/img/project_home_en.png" width="80%" />
+ </p>
+<blockquote>
+<p>The project home page contains task status statistics, process status
statistics, and workflow definition statistics for the project.</p>
+</blockquote>
+<ul>
+<li>Task State Statistics: It refers to the statistics of the number of tasks
to be run, failed, running, completed and succeeded in a given time frame.</li>
+<li>Process State Statistics: It refers to the statistics of the number of
waiting, failing, running, completing and succeeding process instances in a
specified time range.</li>
+<li>Process Definition Statistics: The process definition created by the user
and the process definition granted by the administrator to the user are
counted.</li>
+</ul>
+<h3>Creating Process definitions</h3>
+<ul>
+<li>Go to the project home page, click "Process definitions" and
enter the list page of process definition.</li>
+<li>Click "Create process" to create a new process definition.</li>
+<li>Drag the "SHELL" node to the canvas and add a shell task.</li>
+<li>Fill in the Node Name, Description, and Script fields.</li>
+<li>Selecting "task priority" will give priority to high-level tasks
in the execution queue. Tasks with the same priority will be executed in the
first-in-first-out order.</li>
+<li>Timeout alarm. Fill in "Overtime Time". When the task execution
time exceeds the overtime, it can alarm and fail over time.</li>
+<li>Fill in "Custom Parameters" and refer to <a
href="#CustomParameters">Custom Parameters</a><p align="center">
+<img src="/img/process_definitions_en.png" width="80%" />
+ </p>
+</li>
+<li>Increase the order of execution between nodes: click "line
connection". As shown, task 2 and task 3 are executed in parallel. When
task 1 is executed, task 2 and task 3 are executed simultaneously.</li>
+</ul>
+<p align="center">
+ <img src="/img/task_en.png" width="80%" />
+ </p>
+<ul>
+<li>Delete dependencies: Click on the arrow icon to "drag nodes and
select items", select the connection line, click on the delete icon to
delete dependencies between nodes.</li>
+</ul>
+<p align="center">
+ <img src="/img/delete_dependencies_en.png" width="80%" />
+ </p>
+<ul>
+<li>Click "Save", enter the name of the process definition, the
description of the process definition, and set the global parameters.</li>
+</ul>
+<p align="center">
+ <img src="/img/global_parameters_en.png" width="80%" />
+ </p>
+<ul>
+<li>For other types of nodes, refer to <a href="#TaskNodeType">task node types
and parameter settings</a></li>
+</ul>
+<h3>Execution process definition</h3>
+<ul>
+<li><strong>The process definition of the off-line state can be edited, but
not run</strong>, so the on-line workflow is the first step.</li>
+</ul>
+<blockquote>
+<p>Click on the Process definition, return to the list of process definitions,
click on the icon "online", online process definition.</p>
+</blockquote>
+<blockquote>
+<p>Before setting workflow offline, the timed tasks in timed management should
be offline, so that the definition of workflow can be set offline
successfully.</p>
+</blockquote>
+<ul>
+<li>Click "Run" to execute the process. Description of operation
parameters:
+<ul>
+<li>Failure strategy:<strong>When a task node fails to execute, other parallel
task nodes need to execute the strategy</strong>。”Continue
"Representation: Other task nodes perform normally", "End"
Representation: Terminate all ongoing tasks and terminate the entire
process.</li>
+<li>Notification strategy:When the process is over, send process execution
information notification mail according to the process status.</li>
+<li>Process priority: The priority of process running is divided into five
levels:the highest, the high, the medium, the low, and the lowest . High-level
processes are executed first in the execution queue, and processes with the
same priority are executed first in first out order.</li>
+<li>Worker group: This process can only be executed in a specified machine
group. Default, by default, can be executed on any worker.</li>
+<li>Notification group: When the process ends or fault tolerance occurs,
process information is sent to all members of the notification group by
mail.</li>
+<li>Recipient: Enter the mailbox and press Enter key to save. When the process
ends and fault tolerance occurs, an alert message is sent to the recipient
list.</li>
+<li>Cc: Enter the mailbox and press Enter key to save. When the process is
over and fault-tolerant occurs, alarm messages are copied to the copier
list.</li>
+</ul>
+</li>
+</ul>
+<p align="center">
+ <img src="/img/start-process-en.png" width="80%" />
+ </p>
+<ul>
+<li>Complement: To implement the workflow definition of a specified date, you
can select the time range of the complement (currently only support for
continuous days), such as the data from May 1 to May 10, as shown in the
figure:</li>
+</ul>
+<p align="center">
+ <img src="/img/complement-en.png" width="80%" />
+ </p>
+<blockquote>
+<p>Complement execution mode includes serial execution and parallel execution.
In serial mode, the complement will be executed sequentially from May 1 to May
10. In parallel mode, the tasks from May 1 to May 10 will be executed
simultaneously.</p>
+</blockquote>
+<h3>Timing Process Definition</h3>
+<ul>
+<li>Create Timing: "Process Definition - > Timing"</li>
+<li>Choose start-stop time, in the start-stop time range, regular normal work,
beyond the scope, will not continue to produce timed workflow instances.</li>
+</ul>
+<p align="center">
+ <img src="/img/timing-en.png" width="80%" />
+ </p>
+<ul>
+<li>Add a timer to be executed once a day at 5:00 a.m. as shown below:</li>
+</ul>
+<p align="center">
+ <img src="/img/timer-en.png" width="80%" />
+ </p>
+<ul>
+<li>Timely online,<strong>the newly created timer is offline. You need to
click "Timing Management - >online" to work properly.</strong></li>
+</ul>
+<h3>View process instances</h3>
+<blockquote>
+<p>Click on "Process Instances" to view the list of process
instances.</p>
+</blockquote>
+<blockquote>
+<p>Click on the process name to see the status of task execution.</p>
+</blockquote>
+ <p align="center">
+ <img src="/img/process-instances-en.png" width="80%" />
+ </p>
+<blockquote>
+<p>Click on the task node, click "View Log" to view the task
execution log.</p>
+</blockquote>
+ <p align="center">
+ <img src="/img/view-log-en.png" width="80%" />
+ </p>
+<blockquote>
+<p>Click on the task instance node, click <strong>View History</strong> to
view the list of task instances that the process instance runs.</p>
+</blockquote>
+ <p align="center">
+ <img src="/img/instance-runs-en.png" width="80%" />
+ </p>
+<blockquote>
+<p>Operations on workflow instances:</p>
+</blockquote>
+<p align="center">
+ <img src="/img/workflow-instances-en.png" width="80%" />
+</p>
+<ul>
+<li>Editor: You can edit the terminated process. When you save it after
editing, you can choose whether to update the process definition or not.</li>
+<li>Rerun: A process that has been terminated can be re-executed.</li>
+<li>Recovery failure: For a failed process, a recovery failure operation can
be performed, starting at the failed node.</li>
+<li>Stop: Stop the running process, the background will <code>kill</code> he
worker process first, then <code>kill -9</code> operation.</li>
+<li>Pause:The running process can be <strong>suspended</strong>, the system
state becomes <strong>waiting to be executed</strong>, waiting for the end of
the task being executed, and suspending the next task to be executed.</li>
+<li>Restore pause: <strong>The suspended process</strong> can be restored and
run directly from the suspended node</li>
+<li>Delete: Delete process instances and task instances under process
instances</li>
+<li>Gantt diagram: The vertical axis of Gantt diagram is the topological
ordering of task instances under a process instance, and the horizontal axis is
the running time of task instances, as shown in the figure:</li>
+</ul>
+<p align="center">
+ <img src="/img/gantt-en.png" width="80%" />
+</p>
+<h3>View task instances</h3>
+<blockquote>
+<p>Click on "Task Instance" to enter the Task List page and query
the performance of the task.</p>
+</blockquote>
+<p align="center">
+ <img src="/img/task-instances-en.png" width="80%" />
+</p>
+<blockquote>
+<p>Click "View Log" in the action column to view the log of task
execution.</p>
+</blockquote>
+<p align="center">
+ <img src="/img/task-execution-en.png" width="80%" />
+</p>
+<h3>Create data source</h3>
+<blockquote>
+<p>Data Source Center supports MySQL, POSTGRESQL, HIVE and Spark data
sources.</p>
+</blockquote>
+<h4>Create and edit MySQL data source</h4>
+<ul>
+<li>Click on "Datasource - > Create Datasources" to create
different types of datasources according to requirements.</li>
+<li>Datasource: Select MYSQL</li>
+<li>Datasource Name: Name of Input Datasource</li>
+<li>Description: Description of input datasources</li>
+<li>IP: Enter the IP to connect to MySQL</li>
+<li>Port: Enter the port to connect MySQL</li>
+<li>User name: Set the username to connect to MySQL</li>
+<li>Password: Set the password to connect to MySQL</li>
+<li>Database name: Enter the name of the database connecting MySQL</li>
+<li>Jdbc connection parameters: parameter settings for MySQL connections,
filled in as JSON</li>
+</ul>
+<p align="center">
+ <img src="/img/mysql-en.png" width="80%" />
+ </p>
+<blockquote>
+<p>Click "Test Connect" to test whether the data source can be
successfully connected.</p>
+</blockquote>
+<h4>Create and edit POSTGRESQL data source</h4>
+<ul>
+<li>Datasource: Select POSTGRESQL</li>
+<li>Datasource Name: Name of Input Data Source</li>
+<li>Description: Description of input data sources</li>
+<li>IP: Enter IP to connect to POSTGRESQL</li>
+<li>Port: Input port to connect POSTGRESQL</li>
+<li>Username: Set the username to connect to POSTGRESQL</li>
+<li>Password: Set the password to connect to POSTGRESQL</li>
+<li>Database name: Enter the name of the database connecting to POSTGRESQL</li>
+<li>Jdbc connection parameters: parameter settings for POSTGRESQL connections,
filled in as JSON</li>
+</ul>
+<p align="center">
+ <img src="/img/create-datasource-en.png" width="80%" />
+ </p>
+<h4>Create and edit HIVE data source</h4>
+<p>1.Connect with HiveServer 2</p>
+ <p align="center">
+ <img src="/img/hive-en.png" width="80%" />
+ </p>
+<ul>
+<li>Datasource: Select HIVE</li>
+<li>Datasource Name: Name of Input Datasource</li>
+<li>Description: Description of input datasources</li>
+<li>IP: Enter IP to connect to HIVE</li>
+<li>Port: Input port to connect to HIVE</li>
+<li>Username: Set the username to connect to HIVE</li>
+<li>Password: Set the password to connect to HIVE</li>
+<li>Database Name: Enter the name of the database connecting to HIVE</li>
+<li>Jdbc connection parameters: parameter settings for HIVE connections,
filled in in as JSON</li>
+</ul>
+<p>2.Connect using Hive Server 2 HA Zookeeper mode</p>
+ <p align="center">
+ <img src="/img/zookeeper-en.png" width="80%" />
+ </p>
+<p>Note: If <strong>kerberos</strong> is turned on, you need to fill in
<strong>Principal</strong></p>
+<p align="center">
+ <img src="/img/principal-en.png" width="80%" />
+ </p>
+<h4>Create and Edit Spark Datasource</h4>
+<p align="center">
+ <img src="/img/edit-datasource-en.png" width="80%" />
+ </p>
+<ul>
+<li>Datasource: Select Spark</li>
+<li>Datasource Name: Name of Input Datasource</li>
+<li>Description: Description of input datasources</li>
+<li>IP: Enter the IP to connect to Spark</li>
+<li>Port: Input port to connect Spark</li>
+<li>Username: Set the username to connect to Spark</li>
+<li>Password: Set the password to connect to Spark</li>
+<li>Database name: Enter the name of the database connecting to Spark</li>
+<li>Jdbc Connection Parameters: Parameter settings for Spark Connections,
filled in as JSON</li>
+</ul>
+<p>Note: If <strong>kerberos</strong> If Kerberos is turned on, you need to
fill in <strong>Principal</strong></p>
+<p align="center">
+ <img src="/img/kerberos-en.png" width="80%" />
+ </p>
+<h3>Upload Resources</h3>
+<ul>
+<li>Upload resource files and udf functions, all uploaded files and resources
will be stored on hdfs, so the following configuration items are required:</li>
+</ul>
+<pre><code>conf/common/common.properties
+ # Users who have permission to create directories under the HDFS root path
+ hdfs.root.user=hdfs
+ # data base dir, resource file will store to this hadoop hdfs path, self
configuration, please make sure the directory exists on hdfs and have read
write permissions。"/escheduler" is recommended
+ data.store2hdfs.basepath=/dolphinscheduler
+ # resource upload startup type : HDFS,S3,NONE
+ res.upload.startup.type=HDFS
+ # whether kerberos starts
+ hadoop.security.authentication.startup.state=false
+ # java.security.krb5.conf path
+ java.security.krb5.conf.path=/opt/krb5.conf
+ # loginUserFromKeytab user
+ [email protected]
+ # loginUserFromKeytab path
+ login.user.keytab.path=/opt/hdfs.headless.keytab
+
+conf/common/hadoop.properties
+ # ha or single namenode,If namenode ha needs to copy core-site.xml and
hdfs-site.xml
+ # to the conf directory,support s3,for example : s3a://dolphinscheduler
+ fs.defaultFS=hdfs://mycluster:8020
+ #resourcemanager ha note this need ips , this empty if single
+ yarn.resourcemanager.ha.rm.ids=192.168.xx.xx,192.168.xx.xx
+ # If it is a single resourcemanager, you only need to configure one host
name. If it is resourcemanager HA, the default configuration is fine
+ yarn.application.status.address=http://xxxx:8088/ws/v1/cluster/apps/%s
+
+</code></pre>
+<ul>
+<li>yarn.resourcemanager.ha.rm.ids and yarn.application.status.address only
need to configure one address, and the other address is empty.</li>
+<li>You need to copy core-site.xml and hdfs-site.xml from the conf directory
of the Hadoop cluster to the conf directory of the dolphinscheduler project and
restart the api-server service.</li>
+</ul>
+<h4>File Manage</h4>
+<blockquote>
+<p>It is the management of various resource files, including creating basic
txt/log/sh/conf files, uploading jar packages and other types of files,
editing, downloading, deleting and other operations.</p>
+<p align="center">
+ <img src="/img/file-manage-en.png" width="80%" />
+</p>
+</blockquote>
+<ul>
+<li>Create file</li>
+</ul>
+<blockquote>
+<p>File formats support the following
types:txt、log、sh、conf、cfg、py、java、sql、xml、hql</p>
+</blockquote>
+<p align="center">
+ <img src="/img/create-file.png" width="80%" />
+ </p>
+<ul>
+<li>Upload Files</li>
+</ul>
+<blockquote>
+<p>Upload Files: Click the Upload button to upload, drag the file to the
upload area, and the file name will automatically complete the uploaded file
name.</p>
+</blockquote>
+<p align="center">
+ <img src="/img/file-upload-en.png" width="80%" />
+ </p>
+<ul>
+<li>File View</li>
+</ul>
+<blockquote>
+<p>For viewable file types, click on the file name to view file details</p>
+</blockquote>
+<p align="center">
+ <img src="/img/file-view-en.png" width="80%" />
+ </p>
+<ul>
+<li>Download files</li>
+</ul>
+<blockquote>
+<p>You can download a file by clicking the download button in the top right
corner of the file details, or by downloading the file under the download
button after the file list.</p>
+</blockquote>
+<ul>
+<li>File rename</li>
+</ul>
+<p align="center">
+ <img src="/img/rename-en.png" width="80%" />
+ </p>
+<h4>Delete</h4>
+<blockquote>
+<p>File List - > Click the Delete button to delete the specified file</p>
+</blockquote>
+<h4>Resource management</h4>
+<blockquote>
+<p>Resource management and file management functions are similar. The
difference is that resource management is the UDF function of uploading, and
file management uploads user programs, scripts and configuration files.</p>
+</blockquote>
+<ul>
+<li>Upload UDF resources</li>
+</ul>
+<blockquote>
+<p>The same as uploading files.</p>
+</blockquote>
+<h4>Function management</h4>
+<ul>
+<li>Create UDF Functions</li>
+</ul>
+<blockquote>
+<p>Click "Create UDF Function", enter parameters of udf function,
select UDF resources, and click "Submit" to create udf function.</p>
+<p>Currently only temporary udf functions for HIVE are supported</p>
+<ul>
+<li>UDF function name: name when entering UDF Function</li>
+<li>Package Name: Full Path of Input UDF Function</li>
+<li>Parameter: Input parameters used to annotate functions</li>
+<li>Database Name: Reserved Field for Creating Permanent UDF Functions</li>
+<li>UDF Resources: Set up the resource files corresponding to the created
UDF</li>
+</ul>
+</blockquote>
+<p align="center">
+ <img src="/img/udf-function.png" width="80%" />
+ </p>
+<h2>Security</h2>
+<ul>
+<li>The security has the functions of queue management, tenant management,
user management, warning group management, worker group manager, token manage
and other functions. It can also authorize resources, data sources, projects,
etc.</li>
+<li>Administrator login, default username password:
admin/dolphinscheduler123</li>
+</ul>
+<h3>Create queues</h3>
+<ul>
+<li>Queues are used to execute spark, mapreduce and other programs, which
require the use of "queue" parameters.</li>
+<li>"Security" - > "Queue Manage" - > "Create
Queue" <p align="center">
+ <img src="/img/create-queue-en.png" width="80%" />
+</p>
+</li>
+</ul>
+<h3>Create Tenants</h3>
+<ul>
+<li>The tenant corresponds to the account of Linux, which is used by the
worker server to submit jobs. If Linux does not have this user, the worker
would create the account when executing the task.</li>
+<li>Tenant Code:<strong>the tenant code is the only account on Linux that
can't be duplicated.</strong></li>
+</ul>
+ <p align="center">
+ <img src="/img/create-tenant-en.png" width="80%" />
+ </p>
+<h3>Create Ordinary Users</h3>
+<ul>
+<li>User types are <strong>ordinary users</strong> and <strong>administrator
users</strong>..</li>
+</ul>
+<pre><code>* Administrators have **authorization and user management**
privileges, and no privileges to **create project and process-defined
operations**.
+* Ordinary users can **create projects and create, edit, and execute process
definitions**.
+* Note: **If the user switches the tenant, all resources under the tenant will
be copied to the switched new tenant.**
+</code></pre>
+<p align="center">
+ <img src="/img/create-user-en.png" width="80%" />
+ </p>
+<h3>Create alarm group</h3>
+<ul>
+<li>The alarm group is a parameter set at start-up. After the process is
finished, the status of the process and other information will be sent to the
alarm group by mail.</li>
+<li>New and Editorial Warning Group<p align="center">
+<img src="/img/alarm-group-en.png" width="80%" />
+</p>
+</li>
+</ul>
+<h3>Create Worker Group</h3>
+<ul>
+<li>
+<p>Worker group provides a mechanism for tasks to run on a specified worker.
Administrators create worker groups, which can be specified in task nodes and
operation parameters. If the specified grouping is deleted or no grouping is
specified, the task will run on any worker.</p>
+</li>
+<li>
+<p>Multiple IP addresses within a worker group (<strong>aliases can not be
written</strong>), separated by <strong>commas in English</strong></p>
+<p align="center">
+ <img src="/img/worker-group-en.png" width="80%" />
+</p>
+</li>
+</ul>
+<h3>Token manage</h3>
+<ul>
+<li>Because the back-end interface has login check and token management, it
provides a way to operate the system by calling the interface.<p align="center">
+ <img src="/img/token-en.png" width="80%" />
+</p>
+</li>
+<li>Call examples:</li>
+</ul>
+<pre><code class="language-令牌调用示例"> /**
+ * test token
+ */
+ public void doPOSTParam()throws Exception{
+ // create HttpClient
+ CloseableHttpClient httpclient = HttpClients.createDefault();
+
+ // create http post request
+ HttpPost httpPost = new
HttpPost("http://127.0.0.1:12345/dolphinscheduler/projects/create");
+ httpPost.setHeader("token", "123");
+ // set parameters
+ List<NameValuePair> parameters = new
ArrayList<NameValuePair>();
+ parameters.add(new BasicNameValuePair("projectName",
"qzw"));
+ parameters.add(new BasicNameValuePair("desc",
"qzw"));
+ UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(parameters);
+ httpPost.setEntity(formEntity);
+ CloseableHttpResponse response = null;
+ try {
+ // execute
+ response = httpclient.execute(httpPost);
+ // response status code 200
+ if (response.getStatusLine().getStatusCode() == 200) {
+ String content = EntityUtils.toString(response.getEntity(),
"UTF-8");
+ System.out.println(content);
+ }
+ } finally {
+ if (response != null) {
+ response.close();
+ }
+ httpclient.close();
+ }
+ }
+</code></pre>
+<h3>Grant authority</h3>
+<ul>
+<li>Granting permissions includes project permissions, resource permissions,
datasource permissions, UDF Function permissions.</li>
+</ul>
+<blockquote>
+<p>Administrators can authorize projects, resources, data sources and UDF
Functions that are not created by ordinary users. Because project, resource,
data source and UDF Function are all authorized in the same way, the project
authorization is introduced as an example.</p>
+</blockquote>
+<blockquote>
+<p>Note:For projects created by the user himself, the user has all the
permissions. The list of items and the list of selected items will not be
reflected</p>
+</blockquote>
+<ul>
+<li>1.Click on the authorization button of the designated person as follows:<p
align="center">
+ <img src="/img/operation-en.png" width="80%" />
+</li>
+</ul>
+ </p>
+<ul>
+<li>2.Select the project button to authorize the project</li>
+</ul>
+<p align="center">
+ <img src="/img/auth-project-en.png" width="80%" />
+ </p>
+<h3>Monitor center</h3>
+<ul>
+<li>Service management is mainly to monitor and display the health status and
basic information of each service in the system.</li>
+</ul>
+<h4>Master monitor</h4>
+<ul>
+<li>Mainly related information about master.</li>
+</ul>
+<p align="center">
+ <img src="/img/master-monitor-en.png" width="80%" />
+ </p>
+<h4>Worker monitor</h4>
+<ul>
+<li>Mainly related information of worker.</li>
+</ul>
+<p align="center">
+ <img src="/img/worker-monitor-en.png" width="80%" />
+ </p>
+<h4>Zookeeper monitor</h4>
+<ul>
+<li>Mainly the configuration information of each worker and master in
zookpeeper.</li>
+</ul>
+<p align="center">
+ <img src="/img/zookeeper-monitor-en.png" width="80%" />
+ </p>
+<h4>DB monitor</h4>
+<ul>
+<li>Mainly the health status of DB</li>
+</ul>
+<p align="center">
+ <img src="/img/db-monitor-en.png" width="80%" />
+ </p>
+<h4>statistics Manage</h4>
+ <p align="center">
+ <img src="/img/statistics-en.png" width="80%" />
+ </p>
+<ul>
+<li>Commands to be executed: statistics on t_ds_command table</li>
+<li>Number of commands that failed to execute: statistics on the
t_ds_error_command table</li>
+<li>Number of tasks to run: statistics of task_queue data in zookeeper</li>
+<li>Number of tasks to be killed: statistics of task_kill in zookeeper</li>
+</ul>
+<h2><span id=TaskNodeType>Task Node Type and Parameter Setting</span></h2>
+<h3>Shell</h3>
+<ul>
+<li>The shell node, when the worker executes, generates a temporary shell
script, which is executed by a Linux user with the same name as the tenant.</li>
+</ul>
+<blockquote>
+<p>Drag the <img
src="https://analysys.github.io/easyscheduler_docs/images/toolbar_SHELL.png"
alt="PNG"> task node in the toolbar onto the palette and double-click the task
node as follows:</p>
+</blockquote>
+<p align="center">
+ <img src="/img/shell-en.png" width="80%" />
+ </p>`
+<ul>
+<li>Node name: The node name in a process definition is unique</li>
+<li>Run flag: Identify whether the node can be scheduled properly, and if it
does not need to be executed, you can turn on the forbidden execution
switch.</li>
+<li>Description : Describes the function of the node</li>
+<li>Number of failed retries: Number of failed task submissions, support
drop-down and manual filling</li>
+<li>Failure Retry Interval: Interval between tasks that fail to resubmit
tasks, support drop-down and manual filling</li>
+<li>Script: User-developed SHELL program</li>
+<li>Resources: A list of resource files that need to be invoked in a
script</li>
+<li>Custom parameters: User-defined parameters that are part of SHELL replace
the contents of scripts with ${variables}</li>
+</ul>
+<h3>SUB_PROCESS</h3>
+<ul>
+<li>The sub-process node is to execute an external workflow definition as an
task node.</li>
+</ul>
+<blockquote>
+<p>Drag the <img
src="https://analysys.github.io/easyscheduler_docs_cn/images/toolbar_SUB_PROCESS.png"
alt="PNG"> task node in the toolbar onto the palette and double-click the task
node as follows:</p>
+</blockquote>
+<p align="center">
+ <img src="/img/sub-process-en.png" width="80%" />
+ </p>
+<ul>
+<li>Node name: The node name in a process definition is unique</li>
+<li>Run flag: Identify whether the node is scheduled properly</li>
+<li>Description: Describes the function of the node</li>
+<li>Sub-node: The process definition of the selected sub-process is selected,
and the process definition of the selected sub-process can be jumped to by
entering the sub-node in the upper right corner.</li>
+</ul>
+<h3>DEPENDENT</h3>
+<ul>
+<li>Dependent nodes are <strong>dependent checking nodes</strong>. For
example, process A depends on the successful execution of process B yesterday,
and the dependent node checks whether process B has a successful execution
instance yesterday.</li>
+</ul>
+<blockquote>
+<p>Drag the <img
src="https://analysys.github.io/easyscheduler_docs/images/toolbar_DEPENDENT.png"
alt="PNG"> ask node in the toolbar onto the palette and double-click the task
node as follows:</p>
+</blockquote>
+<p align="center">
+ <img src="/img/current-node-en.png" width="80%" />
+ </p>
+<blockquote>
+<p>Dependent nodes provide logical judgment functions, such as checking
whether yesterday's B process was successful or whether the C process was
successfully executed.</p>
+</blockquote>
+ <p align="center">
+ <img src="/img/weekly-A-en.png" width="80%" />
+ </p>
+<blockquote>
+<p>For example, process A is a weekly task and process B and C are daily
tasks. Task A requires that task B and C be successfully executed every day of
the last week, as shown in the figure:</p>
+</blockquote>
+ <p align="center">
+ <img src="/img/weekly-A1-en.png" width="80%" />
+ </p>
+<blockquote>
+<p>If weekly A also needs to be implemented successfully on Tuesday:</p>
+</blockquote>
+ <p align="center">
+ <img src="/img/weekly-A2-en.png" width="80%" />
+ </p>
+<h3>PROCEDURE</h3>
+<ul>
+<li>The procedure is executed according to the selected data source.</li>
+</ul>
+<blockquote>
+<p>Drag the <img
src="https://analysys.github.io/easyscheduler_docs/images/toolbar_PROCEDURE.png"
alt="PNG"> task node in the toolbar onto the palette and double-click the task
node as follows:</p>
+</blockquote>
+<p align="center">
+ <img src="/img/node-setting-en.png" width="80%" />
+ </p>
+<ul>
+<li>Datasource: The data source type of stored procedure supports MySQL and
POSTGRESQL, and chooses the corresponding data source.</li>
+<li>Method: The method name of the stored procedure</li>
+<li>Custom parameters: Custom parameter types of stored procedures support IN
and OUT, and data types support nine data types: VARCHAR, INTEGER, LONG, FLOAT,
DOUBLE, DATE, TIME, TIMESTAMP and BOOLEAN.</li>
+</ul>
+<h3>SQL</h3>
+<ul>
+<li>Drag the <img
src="https://analysys.github.io/easyscheduler_docs/images/toolbar_SQL.png"
alt="PNG"> task node in the toolbar onto the palette.</li>
+<li>Execute non-query SQL functionality<p align="center">
+ <img src="/img/dependent-nodes-en.png" width="80%" />
+</li>
+</ul>
+ </p>
+<ul>
+<li>Executing the query SQL function, you can choose to send mail in the form
of tables and attachments to the designated recipients.</li>
+</ul>
+<p align="center">
+ <img src="/img/double-click-en.png" width="80%" />
+ </p>
+<ul>
+<li>Datasource: Select the corresponding datasource</li>
+<li>sql type: support query and non-query, query is select type query, there
is a result set returned, you can specify mail notification as table,
attachment or table attachment three templates. Non-query is not returned by
result set, and is for update, delete, insert three types of operations</li>
+<li>sql parameter: input parameter format is key1 = value1; key2 =
value2...</li>
+<li>sql statement: SQL statement</li>
+<li>UDF function: For HIVE type data sources, you can refer to UDF functions
created in the resource center, other types of data sources do not support UDF
functions for the time being.</li>
+<li>Custom parameters: SQL task type, and stored procedure is to customize the
order of parameters to set values for methods. Custom parameter type and data
type are the same as stored procedure task type. The difference is that the
custom parameter of the SQL task type replaces the ${variable} in the SQL
statement.</li>
+<li>Pre Statement: Pre-sql is executed before the sql statement</li>
+<li>Post Statement: Post-sql is executed after the sql statement</li>
+</ul>
+<h3>SPARK</h3>
+<ul>
+<li>Through SPARK node, SPARK program can be directly executed. For spark
node, worker will use <code>spark-submit</code> mode to submit tasks.</li>
+</ul>
+<blockquote>
+<p>Drag the <img
src="https://analysys.github.io/easyscheduler_docs/images/toolbar_SPARK.png"
alt="PNG"> task node in the toolbar onto the palette and double-click the task
node as follows:</p>
+</blockquote>
+<p align="center">
+ <img src="/img/spark-submit-en.png" width="80%" />
+ </p>
+<ul>
+<li>Program Type: Support JAVA, Scala and Python</li>
+<li>Class of the main function: The full path of Main Class, the entry to the
Spark program</li>
+<li>Master jar package: It's Spark's jar package</li>
+<li>Deployment: support three modes: yarn-cluster, yarn-client, and local</li>
+<li>Driver Kernel Number: Driver Kernel Number and Memory Number can be
set</li>
+<li>Executor Number: Executor Number, Executor Memory Number and Executor
Kernel Number can be set</li>
+<li>Command Line Parameters: Setting the input parameters of Spark program to
support the replacement of custom parameter variables.</li>
+<li>Other parameters: support - jars, - files, - archives, - conf format</li>
+<li>Resource: If a resource file is referenced in other parameters, you need
to select the specified resource.</li>
+<li>Custom parameters: User-defined parameters in MR locality that replace the
contents in scripts with ${variables}</li>
+</ul>
+<p>Note: JAVA and Scala are just used for identification, no difference. If
it's a Spark developed by Python, there's no class of the main function, and
everything else is the same.</p>
+<h3>MapReduce(MR)</h3>
+<ul>
+<li>Using MR nodes, MR programs can be executed directly. For Mr nodes, worker
submits tasks using <code>hadoop jar</code></li>
+</ul>
+<blockquote>
+<p>Drag the <img
src="https://analysys.github.io/easyscheduler_docs/images/toolbar_MR.png"
alt="PNG"> task node in the toolbar onto the palette and double-click the task
node as follows:</p>
+</blockquote>
+<ol>
+<li>JAVA program</li>
+</ol>
+ <p align="center">
+ <img src="/img/java-program-en.png" width="80%" />
+ </p>
+<ul>
+<li>Class of the main function: The full path of the MR program's entry Main
Class</li>
+<li>Program Type: Select JAVA Language</li>
+<li>Master jar package: MR jar package</li>
+<li>Command Line Parameters: Setting the input parameters of MR program to
support the replacement of custom parameter variables</li>
+<li>Other parameters: support - D, - files, - libjars, - archives format</li>
+<li>Resource: If a resource file is referenced in other parameters, you need
to select the specified resource.</li>
+<li>Custom parameters: User-defined parameters in MR locality that replace the
contents in scripts with ${variables}</li>
+</ul>
+<ol start="2">
+<li>Python program</li>
+</ol>
+<p align="center">
+ <img src="/img/python-program-en.png" width="80%" />
+ </p>
+<ul>
+<li>Program Type: Select Python Language</li>
+<li>Main jar package: Python jar package running MR</li>
+<li>Other parameters: support - D, - mapper, - reducer, - input - output
format, where user-defined parameters can be set, such as:</li>
+<li>mapper "<a href="http://mapper.py">mapper.py</a> 1" - file
mapper.py-reducer reducer.py-file
reducer.py-input/journey/words.txt-output/journey/out/mr/${current
TimeMillis}</li>
+<li>Among them, mapper. py 1 after - mapper is two parameters, the first
parameter is mapper. py, and the second parameter is 1.</li>
+<li>Resource: If a resource file is referenced in other parameters, you need
to select the specified resource.</li>
+<li>Custom parameters: User-defined parameters in MR locality that replace the
contents in scripts with ${variables}</li>
+</ul>
+<h3>Python</h3>
+<ul>
+<li>With Python nodes, Python scripts can be executed directly. For Python
nodes, worker will use <code>python **</code>to submit tasks.</li>
+</ul>
+<blockquote>
+<p>Drag the <img
src="https://analysys.github.io/easyscheduler_docs/images/toolbar_PYTHON.png"
alt="PNG"> task node in the toolbar onto the palette and double-click the task
node as follows:</p>
+</blockquote>
+<p align="center">
+ <img src="/img/python-en.png" width="80%" />
+ </p>
+<ul>
+<li>Script: User-developed Python program</li>
+<li>Resource: A list of resource files that need to be invoked in a script</li>
+<li>Custom parameters: User-defined parameters that are part of Python that
replace the contents in the script with ${variables}</li>
+</ul>
+<h3>System parameter</h3>
+<table>
+ <tr><th>variable</th><th>meaning</th></tr>
+ <tr>
+ <td>${system.biz.date}</td>
+ <td>The timing time of routine dispatching instance is one day before,
in yyyyyMMdd format. When data is supplemented, the date + 1</td>
+ </tr>
+ <tr>
+ <td>${system.biz.curdate}</td>
+ <td> Daily scheduling example timing time, format is yyyyyMMdd, when
supplementing data, the date + 1</td>
+ </tr>
+ <tr>
+ <td>${system.datetime}</td>
+ <td>Daily scheduling example timing time, format is yyyyyMMddHmmss,
when supplementing data, the date + 1</td>
+ </tr>
+</table>
+<h3>Time Customization Parameters</h3>
+<ul>
+<li>
+<p>Support code to customize the variable name, declaration: ${variable name}.
It can refer to "system parameters" or specify
"constants".</p>
+</li>
+<li>
+<p>When we define this benchmark variable as [...], [yyyyMMddHHmmss] can be
decomposed and combined arbitrarily, such as:[yyyyMMdd], $[HHmmss],
$[yyyy-MM-dd] ,etc.</p>
+</li>
+<li>
+<p>Can also do this:</p>
+<ul>
+<li>Later N years: $[add_months (yyyyyyMMdd, 12*N)]</li>
+<li>The previous N years: $[add_months (yyyyyyMMdd, -12*N)]</li>
+<li>Later N months: $[add_months (yyyyyMMdd, N)]</li>
+<li>The first N months: $[add_months (yyyyyyMMdd, -N)]</li>
+<li>Later N weeks: $[yyyyyyMMdd + 7*N]</li>
+<li>The first N weeks: $[yyyyyMMdd-7*N]</li>
+<li>The day after that: $[yyyyyyMMdd + N]</li>
+<li>The day before yesterday: $[yyyyyMMdd-N]</li>
+<li>Later N hours: $[HHmmss + N/24]</li>
+<li>First N hours: $[HHmmss-N/24]</li>
+<li>After N minutes: $[HHmmss + N/24/60]</li>
+<li>First N minutes: $[HHmmss-N/24/60]</li>
+</ul>
+</li>
+</ul>
+<h3><span id=CustomParameters>User-defined parameters</span></h3>
+<ul>
+<li>User-defined parameters are divided into global parameters and local
parameters. Global parameters are the global parameters passed when the process
definition and process instance are saved. Global parameters can be referenced
by local parameters of any task node in the whole process.</li>
+</ul>
+<p>For example:</p>
+<p align="center">
+ <img src="/img/user-defined-en.png" width="80%" />
+ </p>
+<ul>
+<li>global_bizdate is a global parameter, referring to system parameters.</li>
+</ul>
+<p align="center">
+ <img src="/img/user-defined1-en.png" width="80%" />
+ </p>
+<ul>
+<li>In tasks, local_param_bizdate refers to global parameters by
${global_bizdate} for scripts, the value of variable local_param_bizdate can be
referenced by ${local_param_bizdate}, or the value of local_param_bizdate can
be set directly by JDBC.</li>
+</ul>
+</div></section><footer class="footer-container"><div class="footer-body"><img
src="/img/ds_gray.svg"/><div class="cols-container"><div class="col
col-12"><h3>Disclaimer</h3><p>Apache DolphinScheduler (incubating) is an effort
undergoing incubation at The Apache Software Foundation (ASF), sponsored by
Incubator.
+Incubation is required of all newly accepted projects until a further review
indicates
+that the infrastructure, communications, and decision making process have
stabilized in a manner consistent with other successful ASF projects.
+While incubation status is not necessarily a reflection of the completeness or
stability of the code,
+it does indicate that the project has yet to be fully endorsed by the
ASF.</p></div><div class="col col-6"><dl><dt>Documentation</dt><dd><a
href="/en-us/docs/1.2.0/user_doc/architecture-design.html"
target="_self">Overview</a></dd><dd><a
href="/en-us/docs/1.2.0/user_doc/quick-start.html" target="_self">Quick
start</a></dd><dd><a href="/en-us/docs/1.2.0/user_doc/backend-development.html"
target="_self">Developer guide</a></dd></dl></div><div class="col
col-6"><dl><dt>ASF</dt><dd><a href=" [...]
+ <script
src="https://f.alicdn.com/react/15.4.1/react-with-addons.min.js"></script>
+ <script
src="https://f.alicdn.com/react/15.4.1/react-dom.min.js"></script>
+ <script>
+ window.rootPath = '';
+ </script>
+ <script src="/build/documentation.js"></script>
+</body>
+</html>
\ No newline at end of file
diff --git a/en-us/docs/1.2.1/user_doc/system-manual.json
b/en-us/docs/1.2.1/user_doc/system-manual.json
new file mode 100644
index 0000000..c30bee8
--- /dev/null
+++ b/en-us/docs/1.2.1/user_doc/system-manual.json
@@ -0,0 +1,6 @@
+{
+ "filename": "system-manual.md",
+ "__html": "<h1>System Use Manual</h1>\n<h2>Operational
Guidelines</h2>\n<h3>Home page</h3>\n<p>The homepage contains task status
statistics, process status statistics, and workflow definition statistics for
all user projects.</p>\n<p align=\"center\">\n <img
src=\"/img/home_en.png\" width=\"80%\" />\n </p>\n<h3>Create a
project</h3>\n<ul>\n<li>Click "Project - > Create Project", enter
project name, description, and click "Submit" to create a new
project.</l [...]
+ "link": "/en-us/docs/1.2.1/user_doc/system-manual.html",
+ "meta": {}
+}
\ No newline at end of file
diff --git a/en-us/docs/1.2.1/user_doc/upgrade.html
b/en-us/docs/1.2.1/user_doc/upgrade.html
new file mode 100644
index 0000000..aa120d4
--- /dev/null
+++ b/en-us/docs/1.2.1/user_doc/upgrade.html
@@ -0,0 +1,65 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0,
maximum-scale=1.0, user-scalable=no">
+ <meta name="keywords" content="upgrade" />
+ <meta name="description" content="upgrade" />
+ <!-- 网页标签标题 -->
+ <title>upgrade</title>
+ <link rel="shortcut icon" href="/img/docsite.ico"/>
+ <link rel="stylesheet" href="/build/documentation.css" />
+</head>
+<body>
+ <div id="root"><div class="documentation-page"
data-reactroot=""><header class="header-container header-container-normal"><div
class="header-body"><a href="/en-us/index.html"><img class="logo"
src="/img/hlogo_colorful.svg"/></a><div class="search search-normal"><span
class="icon-search"></span></div><span class="language-switch
language-switch-normal">中</span><div class="header-menu"><img
class="header-menu-toggle" src="/img/system/menu_gray.png"/><div><ul
class="ant-menu blackClass ant [...]
+<h2>1. Back up the previous version of the files and database</h2>
+<h2>2. Stop all services of dolphinscheduler</h2>
+<p><code>sh ./script/stop-all.sh</code></p>
+<h2>3. Download the new version of the installation package</h2>
+<ul>
+<li><a
href="https://dolphinscheduler.apache.org/en-us/docs/user_doc/download.html">download</a>,
download the latest version of the front and back installation packages
(backend referred to as dolphinscheduler-backend, front end referred to as
dolphinscheduler-front)</li>
+<li>The following upgrade operations need to be performed in the new version
of the directory</li>
+</ul>
+<h2>4. Database upgrade</h2>
+<ul>
+<li>Modify the following properties in conf/application-dao.properties</li>
+</ul>
+<pre><code> spring.datasource.url
+ spring.datasource.username
+ spring.datasource.password
+</code></pre>
+<ul>
+<li>Execute database upgrade script</li>
+</ul>
+<p><code>sh ./script/upgrade-dolphinscheduler.sh</code></p>
+<h2>5. Backend service upgrade</h2>
+<ul>
+<li>
+<p>Modify the content of the <a href="http://install.sh">install.sh</a>
configuration and execute the upgrade script</p>
+<p><code>sh install.sh</code></p>
+</li>
+</ul>
+<h2>6. Frontend service upgrade</h2>
+<ul>
+<li>
+<p>Overwrite the previous version of the dist directory</p>
+</li>
+<li>
+<p>Restart the nginx service</p>
+<p><code>systemctl restart nginx</code></p>
+</li>
+</ul>
+</div></section><footer class="footer-container"><div class="footer-body"><img
src="/img/ds_gray.svg"/><div class="cols-container"><div class="col
col-12"><h3>Disclaimer</h3><p>Apache DolphinScheduler (incubating) is an effort
undergoing incubation at The Apache Software Foundation (ASF), sponsored by
Incubator.
+Incubation is required of all newly accepted projects until a further review
indicates
+that the infrastructure, communications, and decision making process have
stabilized in a manner consistent with other successful ASF projects.
+While incubation status is not necessarily a reflection of the completeness or
stability of the code,
+it does indicate that the project has yet to be fully endorsed by the
ASF.</p></div><div class="col col-6"><dl><dt>Documentation</dt><dd><a
href="/en-us/docs/1.2.0/user_doc/architecture-design.html"
target="_self">Overview</a></dd><dd><a
href="/en-us/docs/1.2.0/user_doc/quick-start.html" target="_self">Quick
start</a></dd><dd><a href="/en-us/docs/1.2.0/user_doc/backend-development.html"
target="_self">Developer guide</a></dd></dl></div><div class="col
col-6"><dl><dt>ASF</dt><dd><a href=" [...]
+ <script
src="https://f.alicdn.com/react/15.4.1/react-with-addons.min.js"></script>
+ <script
src="https://f.alicdn.com/react/15.4.1/react-dom.min.js"></script>
+ <script>
+ window.rootPath = '';
+ </script>
+ <script src="/build/documentation.js"></script>
+</body>
+</html>
\ No newline at end of file
diff --git a/en-us/docs/1.2.1/user_doc/upgrade.json
b/en-us/docs/1.2.1/user_doc/upgrade.json
new file mode 100644
index 0000000..8f89628
--- /dev/null
+++ b/en-us/docs/1.2.1/user_doc/upgrade.json
@@ -0,0 +1,6 @@
+{
+ "filename": "upgrade.md",
+ "__html": "<h1>DolphinScheduler upgrade documentation</h1>\n<h2>1. Back up
the previous version of the files and database</h2>\n<h2>2. Stop all services
of dolphinscheduler</h2>\n<p><code>sh ./script/stop-all.sh</code></p>\n<h2>3.
Download the new version of the installation package</h2>\n<ul>\n<li><a
href=\"https://dolphinscheduler.apache.org/en-us/docs/user_doc/download.html\">download</a>,
download the latest version of the front and back installation packages
(backend referred to a [...]
+ "link": "/en-us/docs/1.2.1/user_doc/upgrade.html",
+ "meta": {}
+}
\ No newline at end of file