FALCON-1107 Move trusted extensions processing to server side Ignore any documentation issues as it will be addressed in https://issues.apache.org/jira/browse/FALCON-1106. Thanks!
Author: Sowmya Ramesh <[email protected]> Reviewers: "Balu Vellanki <[email protected]>, Venkat Ranganathan <[email protected]>" Closes #92 from sowmyaramesh/FALCON-1107 Project: http://git-wip-us.apache.org/repos/asf/falcon/repo Commit: http://git-wip-us.apache.org/repos/asf/falcon/commit/95bf312f Tree: http://git-wip-us.apache.org/repos/asf/falcon/tree/95bf312f Diff: http://git-wip-us.apache.org/repos/asf/falcon/diff/95bf312f Branch: refs/heads/master Commit: 95bf312f46bc96bc247645da6500b495c21aede3 Parents: c52961c Author: Sowmya Ramesh <[email protected]> Authored: Tue Apr 12 16:05:48 2016 -0700 Committer: bvellanki <[email protected]> Committed: Tue Apr 12 16:05:48 2016 -0700 ---------------------------------------------------------------------- addons/extensions/hdfs-mirroring/README | 29 ++ addons/extensions/hdfs-mirroring/pom.xml | 32 ++ .../main/META/hdfs-mirroring-properties.json | 137 +++++++ .../runtime/hdfs-mirroring-template.xml | 45 +++ .../runtime/hdfs-mirroring-workflow.xml | 82 +++++ addons/extensions/hive-mirroring/README | 58 +++ addons/extensions/hive-mirroring/pom.xml | 32 ++ .../main/META/hive-mirroring-properties.json | 179 +++++++++ .../META/hive-mirroring-secure-properties.json | 191 ++++++++++ .../runtime/hive-mirroring-secure-template.xml | 45 +++ .../runtime/hive-mirroring-secure-workflow.xml | 363 +++++++++++++++++++ .../runtime/hive-mirroring-template.xml | 45 +++ .../runtime/hive-mirroring-workflow.xml | 255 +++++++++++++ .../java/org/apache/falcon/hive/HiveDRArgs.java | 9 +- .../org/apache/falcon/hive/HiveDROptions.java | 38 +- addons/recipes/hdfs-replication/README.txt | 29 -- addons/recipes/hdfs-replication/pom.xml | 32 -- .../resources/hdfs-replication-template.xml | 44 --- .../resources/hdfs-replication-workflow.xml | 82 ----- .../main/resources/hdfs-replication.properties | 79 ---- .../recipes/hive-disaster-recovery/README.txt | 58 --- addons/recipes/hive-disaster-recovery/pom.xml | 32 -- .../hive-disaster-recovery-secure-template.xml | 45 --- .../hive-disaster-recovery-secure-workflow.xml | 363 ------------------- .../hive-disaster-recovery-secure.properties | 110 ------ .../hive-disaster-recovery-template.xml | 45 --- .../hive-disaster-recovery-workflow.xml | 249 ------------- .../resources/hive-disaster-recovery.properties | 98 ----- .../falcon/catalog/AbstractCatalogService.java | 12 + .../falcon/catalog/HiveCatalogService.java | 16 + common/src/main/resources/startup.properties | 2 + extensions/pom.xml | 112 ++++++ .../falcon/extensions/AbstractExtension.java | 58 +++ .../org/apache/falcon/extensions/Extension.java | 102 ++++++ .../falcon/extensions/ExtensionBuilder.java | 32 ++ .../falcon/extensions/ExtensionFactory.java | 48 +++ .../falcon/extensions/ExtensionProperties.java | 89 +++++ .../falcon/extensions/ExtensionService.java | 49 +++ .../mirroring/hdfs/HdfsMirroringExtension.java | 111 ++++++ .../hdfs/HdfsMirroringExtensionProperties.java | 65 ++++ .../mirroring/hive/HiveMirroringExtension.java | 231 ++++++++++++ .../hive/HiveMirroringExtensionProperties.java | 92 +++++ .../falcon/extensions/store/ExtensionStore.java | 215 +++++++++++ .../util/ExtensionProcessBuilderUtils.java | 309 ++++++++++++++++ .../falcon/extensions/ExtensionServiceTest.java | 53 +++ .../apache/falcon/extensions/ExtensionTest.java | 160 ++++++++ .../store/AbstractTestExtensionStore.java | 103 ++++++ .../extensions/store/ExtensionStoreTest.java | 65 ++++ .../src/test/resources/backup-cluster-0.1.xml | 44 +++ .../test/resources/hdfs-mirroring-template.xml | 45 +++ .../test/resources/hive-mirroring-template.xml | 45 +++ .../src/test/resources/primary-cluster-0.1.xml | 44 +++ oozie/pom.xml | 6 + .../service/SharedLibraryHostingService.java | 91 ++++- pom.xml | 2 + src/main/assemblies/distributed-package.xml | 79 +++- src/main/assemblies/standalone-package.xml | 80 +++- 57 files changed, 3851 insertions(+), 1315 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hdfs-mirroring/README ---------------------------------------------------------------------- diff --git a/addons/extensions/hdfs-mirroring/README b/addons/extensions/hdfs-mirroring/README new file mode 100644 index 0000000..78f1726 --- /dev/null +++ b/addons/extensions/hdfs-mirroring/README @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +HDFS Directory Replication Extension + +Overview +This extension implements replicating arbitrary directories on HDFS from one +Hadoop cluster to another Hadoop cluster. +This piggy backs on replication solution in Falcon which uses the DistCp tool. + +Use Case +* Copy directories between HDFS clusters with out dated partitions +* Archive directories from HDFS to Cloud. Ex: S3, Azure WASB + +Limitations +As the data volume and number of files grow, this can get inefficient. http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hdfs-mirroring/pom.xml ---------------------------------------------------------------------- diff --git a/addons/extensions/hdfs-mirroring/pom.xml b/addons/extensions/hdfs-mirroring/pom.xml new file mode 100644 index 0000000..cb9304e --- /dev/null +++ b/addons/extensions/hdfs-mirroring/pom.xml @@ -0,0 +1,32 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + + <modelVersion>4.0.0</modelVersion> + <groupId>org.apache.falcon.extensions</groupId> + <artifactId>falcon-hdfs-mirroring-extension</artifactId> + <version>0.10-SNAPSHOT</version> + <description>Apache Falcon sample Hdfs mirroring extension</description> + <name>Apache Falcon sample Hdfs mirroring extension</name> + <packaging>jar</packaging> +</project> http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hdfs-mirroring/src/main/META/hdfs-mirroring-properties.json ---------------------------------------------------------------------- diff --git a/addons/extensions/hdfs-mirroring/src/main/META/hdfs-mirroring-properties.json b/addons/extensions/hdfs-mirroring/src/main/META/hdfs-mirroring-properties.json new file mode 100644 index 0000000..f1b4775 --- /dev/null +++ b/addons/extensions/hdfs-mirroring/src/main/META/hdfs-mirroring-properties.json @@ -0,0 +1,137 @@ +{ + "shortDescription": "This extension implements replicating arbitrary directories on HDFS from one Hadoop cluster to another Hadoop cluster. This piggy backs on replication solution in Falcon which uses the DistCp tool.", + "properties":[ + { + "propertyName":"jobName", + "required":true, + "description":"Unique job name", + "example":"hdfs-monthly-sales-dr" + }, + { + "propertyName":"jobClusterName", + "required":true, + "description":"Cluster where job should run", + "example":"backupCluster" + }, + { + "propertyName":"jobValidityStart", + "required":true, + "description":"Job validity start time", + "example":"2016-03-03T00:00Z" + }, + { + "propertyName":"jobValidityEnd", + "required":true, + "description":"Job validity end time", + "example":"2018-03-13T00:00Z" + }, + { + "propertyName":"jobFrequency", + "required":true, + "description":"job frequency. Valid frequency types are minutes, hours, days, months", + "example":"months(1)" + }, + { + "propertyName":"jobTimezone", + "required":false, + "description":"Time zone for the job", + "example":"GMT" + }, + { + "propertyName":"jobTags", + "required":false, + "description":"list of comma separated tags. Key Value Pairs, separated by comma", + "example":"[email protected], [email protected], _department_type=forecasting" + }, + { + "propertyName":"jobRetryPolicy", + "required":false, + "description":"Job retry policy", + "example":"periodic" + }, + { + "propertyName":"jobRetryDelay", + "required":false, + "description":"Job retry delay", + "example":"minutes(30)" + }, + { + "propertyName":"jobRetryAttempts", + "required":false, + "description":"Job retry attempts", + "example":"3" + }, + { + "propertyName":"jobRetryOnTimeout", + "required":false, + "description":"Job retry on timeout", + "example":"true" + }, + { + "propertyName":"jobAclOwner", + "required":false, + "description":"ACL owner", + "example":"ambari-qa" + }, + { + "propertyName":"jobAclGroup", + "required":false, + "description":"ACL group", + "example":"users" + }, + { + "propertyName":"jobAclPermission", + "required":false, + "description":"ACL permission", + "example":"0x755" + }, + { + "propertyName":"sourceDir", + "required":true, + "description":"Multiple hdfs comma separated source directories", + "example":"/user/ambari-qa/primaryCluster/dr/input1, /user/ambari-qa/primaryCluster/dr/input2" + }, + { + "propertyName":"sourceCluster", + "required":true, + "description":"Source cluster for hdfs mirroring", + "example":"primaryCluster" + }, + { + "propertyName":"targetDir", + "required":true, + "description":"Target hdfs directory", + "example":"/user/ambari-qa/backupCluster/dr" + }, + { + "propertyName":"targetCluster", + "required":true, + "description":"Target cluster for hdfs mirroring", + "example":"backupCluster" + }, + { + "propertyName":"distcpMaxMaps", + "required":false, + "description":"Maximum number of mappers for DistCP", + "example":"1" + }, + { + "propertyName":"distcpMapBandwidth", + "required":false, + "description":"Bandwidth in MB for each mapper in DistCP", + "example":"100" + }, + { + "propertyName":"jobNotificationType", + "required":false, + "description":"Email Notification for Falcon instance completion", + "example":"email" + }, + { + "propertyName":"jobNotificationReceivers", + "required":false, + "description":"Comma separated email Id's", + "example":"[email protected], [email protected]" + } + ] +} http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-template.xml ---------------------------------------------------------------------- diff --git a/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-template.xml b/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-template.xml new file mode 100644 index 0000000..d511d00 --- /dev/null +++ b/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-template.xml @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + +<process name="##jobName##" xmlns="uri:falcon:process:0.1"> + <clusters> + <!-- source --> + <cluster name="##jobClusterName##"> + <validity end="##jobValidityEnd##" start="##jobValidityStart##"/> + </cluster> + </clusters> + + <tags/> + + <parallel>1</parallel> + <!-- Dir replication needs to run only once to catch up --> + <order>LAST_ONLY</order> + <frequency>##jobFrequency##</frequency> + <timezone>##jobTimezone##</timezone> + + <properties> + <property name="oozie.wf.subworkflow.classpath.inheritance" value="true"/> + </properties> + + <workflow name="##jobWorkflowName##" engine="##jobWorkflowEngine##" + path="##jobWorkflowPath##" lib="##jobWorkflowLibPath##"/> + <retry policy="##jobRetryPolicy##" delay="##jobRetryDelay##" attempts="3"/> + <notification type="##jobNotificationType##" to="##jobNotificationReceivers##"/> + <ACL/> +</process> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-workflow.xml ---------------------------------------------------------------------- diff --git a/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-workflow.xml b/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-workflow.xml new file mode 100644 index 0000000..1e2282c --- /dev/null +++ b/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-workflow.xml @@ -0,0 +1,82 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> +<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-dr-fs-workflow'> + <start to='dr-replication'/> + <!-- Replication action --> + <action name="dr-replication"> + <java> + <job-tracker>${jobTracker}</job-tracker> + <name-node>${nameNode}</name-node> + <configuration> + <property> <!-- hadoop 2 parameter --> + <name>oozie.launcher.mapreduce.job.user.classpath.first</name> + <value>true</value> + </property> + <property> + <name>mapred.job.queue.name</name> + <value>${queueName}</value> + </property> + <property> + <name>oozie.launcher.mapred.job.priority</name> + <value>${jobPriority}</value> + </property> + <property> + <name>oozie.use.system.libpath</name> + <value>true</value> + </property> + <property> + <name>oozie.action.sharelib.for.java</name> + <value>distcp</value> + </property> + <property> + <name>oozie.launcher.oozie.libpath</name> + <value>${wf:conf("falcon.libpath")}</value> + </property> + <property> + <name>oozie.launcher.mapreduce.job.hdfs-servers</name> + <value>${sourceClusterFS},${targetClusterFS}</value> + </property> + </configuration> + <main-class>org.apache.falcon.replication.FeedReplicator</main-class> + <arg>-Dmapred.job.queue.name=${queueName}</arg> + <arg>-Dmapred.job.priority=${jobPriority}</arg> + <arg>-maxMaps</arg> + <arg>${distcpMaxMaps}</arg> + <arg>-mapBandwidth</arg> + <arg>${distcpMapBandwidth}</arg> + <arg>-sourcePaths</arg> + <arg>${sourceDir}</arg> + <arg>-targetPath</arg> + <arg>${targetClusterFS}${targetDir}</arg> + <arg>-falconFeedStorageType</arg> + <arg>FILESYSTEM</arg> + <arg>-availabilityFlag</arg> + <arg>${availabilityFlag == 'NA' ? "NA" : availabilityFlag}</arg> + <arg>-counterLogDir</arg> + <arg>${logDir}/job-${nominalTime}/${srcClusterName == 'NA' ? '' : srcClusterName}</arg> + </java> + <ok to="end"/> + <error to="fail"/> + </action> + <kill name="fail"> + <message> + Workflow action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + </message> + </kill> + <end name="end"/> +</workflow-app> http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/README ---------------------------------------------------------------------- diff --git a/addons/extensions/hive-mirroring/README b/addons/extensions/hive-mirroring/README new file mode 100644 index 0000000..827f7e5 --- /dev/null +++ b/addons/extensions/hive-mirroring/README @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Hive Metastore Disaster Recovery Recipe + +Overview +This extension implements replicating hive metadata and data from one +Hadoop cluster to another Hadoop cluster. +This piggy backs on replication solution in Falcon which uses the DistCp tool. + +Use Case +* +* + +Limitations +* +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Hive Metastore Disaster Recovery Extension + +Overview +This extension implements replicating hive metadata and data from one +Hadoop cluster to another Hadoop cluster. +This piggy backs on replication solution in Falcon which uses the DistCp tool. + +Use Case +* +* + +Limitations +* http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/pom.xml ---------------------------------------------------------------------- diff --git a/addons/extensions/hive-mirroring/pom.xml b/addons/extensions/hive-mirroring/pom.xml new file mode 100644 index 0000000..adfb0be --- /dev/null +++ b/addons/extensions/hive-mirroring/pom.xml @@ -0,0 +1,32 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + + <modelVersion>4.0.0</modelVersion> + <groupId>org.apache.falcon.extensions</groupId> + <artifactId>falcon-hive-mirroring-extension</artifactId> + <version>0.10-SNAPSHOT</version> + <description>Apache Falcon sample Hive mirroring extension</description> + <name>Apache Falcon sample Hive mirroring extension</name> + <packaging>jar</packaging> +</project> http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-properties.json ---------------------------------------------------------------------- diff --git a/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-properties.json b/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-properties.json new file mode 100644 index 0000000..a9f3d1b --- /dev/null +++ b/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-properties.json @@ -0,0 +1,179 @@ +{ + "shortDescription":"This extension implements replicating hive metadata and data from one Hadoop cluster to another Hadoop cluster.", + "properties":[ + { + "propertyName":"jobName", + "required":true, + "description":"Unique job name", + "example":"hive-monthly-sales-dr" + }, + { + "propertyName":"jobClusterName", + "required":true, + "description":"Cluster where job should run", + "example":"backupCluster" + }, + { + "propertyName":"jobValidityStart", + "required":true, + "description":"Job validity start time", + "example":"2016-03-03T00:00Z" + }, + { + "propertyName":"jobValidityEnd", + "required":true, + "description":"Job validity end time", + "example":"2018-03-13T00:00Z" + }, + { + "propertyName":"jobFrequency", + "required":true, + "description":"job frequency. Valid frequency types are minutes, hours, days, months", + "example":"months(1)" + }, + { + "propertyName":"jobTimezone", + "required":false, + "description":"Time zone for the job", + "example":"GMT" + }, + { + "propertyName":"jobTags", + "required":false, + "description":"list of comma separated tags. Key Value Pairs, separated by comma", + "example":"[email protected], [email protected], _department_type=forecasting" + }, + { + "propertyName":"jobRetryPolicy", + "required":false, + "description":"Job retry policy", + "example":"periodic" + }, + { + "propertyName":"jobRetryDelay", + "required":false, + "description":"Job retry delay", + "example":"minutes(30)" + }, + { + "propertyName":"jobRetryAttempts", + "required":false, + "description":"Job retry attempts", + "example":"3" + }, + { + "propertyName":"jobRetryOnTimeout", + "required":false, + "description":"Job retry on timeout", + "example":true + }, + { + "propertyName":"jobAclOwner", + "required":false, + "description":"ACL owner", + "example":"ambari-qa" + }, + { + "propertyName":"jobAclGroup", + "required":false, + "description":"ACL group", + "example":"users" + }, + { + "propertyName":"jobAclPermission", + "required":false, + "description":"ACL permission", + "example":"0x755" + }, + { + "propertyName":"sourceCluster", + "required":true, + "description":"Source cluster for hive mirroring", + "example":"primaryCluster" + }, + { + "propertyName":"sourceHiveServer2Uri", + "required":true, + "description":"Hive2 server end point", + "example":"hive2://localhost:10000" + }, + { + "propertyName":"sourceDatabases", + "required":true, + "description":"For DB level replication specify multiple comma separated databases to replicate", + "example":"salesDb" + }, + { + "propertyName":"sourceTables", + "required":false, + "description":"For table level replication specify multiple comma separated tables to replicate", + "example":"monthly_sales1, monthly_sales2" + }, + { + "propertyName":"sourceStagingPath", + "required":false, + "description":"Staging path on source", + "example":"/apps/hive/dr" + }, + { + "propertyName":"targetCluster", + "required":true, + "description":"target cluster for hive mirroring", + "example":"backupCluster" + }, + { + "propertyName":"targetHiveServer2Uri", + "required":true, + "description":"Hive2 server end point", + "example":"hive2://localhost:10000" + }, + { + "propertyName":"targetStagingPath", + "required":false, + "description":"Staging path on target", + "example":"/apps/hive/dr" + }, + { + "propertyName":"maxEvents", + "required":false, + "description":"To ceil the max events processed each time the job runs. Set it to max value depending on your bandwidth limit. Setting it to -1 will process all the events but can hog up the bandwidth. Use it judiciously!", + "example":"10000" + }, + { + "propertyName":"replicationMaxMaps", + "required":false, + "description":"Maximum number of mappers to use for hive replication", + "example":"1" + }, + { + "propertyName":"distcpMaxMaps", + "required":false, + "description":"Maximum number of mappers for DistCP", + "example":"1" + }, + { + "propertyName":"distcpMapBandwidth", + "required":false, + "description":"Bandwidth in MB for each mapper in DistCP", + "example":"100" + }, + { + "propertyName":"tdeEncryptionEnabled", + "required":false, + "description":"Set this flag to true if TDE encryption is enabled on source and target. Default value is false", + "example":"true" + }, + { + "propertyName":"jobNotificationType", + "required":false, + "description":"Email Notification for Falcon instance completion", + "example":"email" + }, + { + "propertyName":"jobNotificationReceivers", + "required":false, + "description":"Comma separated email Id's", + "example":"[email protected], [email protected]" + } + ] +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-secure-properties.json ---------------------------------------------------------------------- diff --git a/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-secure-properties.json b/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-secure-properties.json new file mode 100644 index 0000000..8ec03b5 --- /dev/null +++ b/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-secure-properties.json @@ -0,0 +1,191 @@ +{ + "shortDescription": "This extension implements replicating hive metadata and data from one Hadoop cluster to another Hadoop cluster in secure environment.", + "properties":[ + { + "propertyName":"jobName", + "required":true, + "description":"Unique job name", + "example":"hive-monthly-sales-dr" + }, + { + "propertyName":"jobClusterName", + "required":true, + "description":"Cluster where job should run", + "example":"backupCluster" + }, + { + "propertyName":"jobValidityStart", + "required":true, + "description":"Job validity start time", + "example":"2016-03-03T00:00Z" + }, + { + "propertyName":"jobValidityEnd", + "required":true, + "description":"Job validity end time", + "example":"2018-03-13T00:00Z" + }, + { + "propertyName":"jobFrequency", + "required":true, + "description":"job frequency. Valid frequency types are minutes, hours, days, months", + "example":"months(1)" + }, + { + "propertyName":"jobTimezone", + "required":false, + "description":"Time zone for the job", + "example":"GMT" + }, + { + "propertyName":"jobTags", + "required":false, + "description":"list of comma separated tags. Key Value Pairs, separated by comma", + "example":"[email protected], [email protected], _department_type=forecasting" + }, + { + "propertyName":"jobRetryPolicy", + "required":false, + "description":"Job retry policy", + "example":"periodic" + }, + { + "propertyName":"jobRetryDelay", + "required":false, + "description":"Job retry delay", + "example":"minutes(30)" + }, + { + "propertyName":"jobRetryAttempts", + "required":false, + "description":"Job retry attempts", + "example":"3" + }, + { + "propertyName":"jobRetryOnTimeout", + "required":false, + "description":"Job retry on timeout", + "example":true + }, + { + "propertyName":"jobAclOwner", + "required":false, + "description":"ACL owner", + "example":"ambari-qa" + }, + { + "propertyName":"jobAclGroup", + "required":false, + "description":"ACL group", + "example":"users" + }, + { + "propertyName":"jobAclPermission", + "required":false, + "description":"ACL permission", + "example":"0x755" + }, + { + "propertyName":"sourceCluster", + "required":true, + "description":"Source cluster for hive mirroring", + "example":"primaryCluster" + }, + { + "propertyName":"sourceHiveServer2Uri", + "required":true, + "description":"Hive2 server end point", + "example":"hive2://localhost:10000" + }, + { + "propertyName":"sourceDatabases", + "required":true, + "description":"For DB level replication specify multiple comma separated databases to replicate", + "example":"salesDb" + }, + { + "propertyName":"sourceTables", + "required":false, + "description":"For table level replication specify multiple comma separated tables to replicate", + "example":"monthly_sales1, monthly_sales2" + }, + { + "propertyName":"sourceStagingPath", + "required":false, + "description":"Staging path on source", + "example":"/apps/hive/dr" + }, + { + "propertyName":"sourceHive2KerberosPrincipal", + "required":true, + "description":"Required on secure clusters. Kerberos principal required to access hive servers ", + "example":"hive/[email protected]" + }, + { + "propertyName":"targetCluster", + "required":true, + "description":"target cluster for hive mirroring", + "example":"backupCluster" + }, + { + "propertyName":"targetHiveServer2Uri", + "required":true, + "description":"Hive2 server end point", + "example":"hive2://localhost:10000" + }, + { + "propertyName":"targetStagingPath", + "required":false, + "description":"Staging path on target", + "example":"/apps/hive/dr" + }, + { + "propertyName":"targetHive2KerberosPrincipal", + "required":true, + "description":"Required on secure clusters. Kerberos principal required to access hive servers ", + "example":"hive/[email protected]" + }, + { + "propertyName":"maxEvents", + "required":false, + "description":"To ceil the max events processed each time the job runs. Set it to max value depending on your bandwidth limit. Setting it to -1 will process all the events but can hog up the bandwidth. Use it judiciously!", + "example":"10000" + }, + { + "propertyName":"replicationMaxMaps", + "required":false, + "description":"Maximum number of mappers to use for hive replication", + "example":"1" + }, + { + "propertyName":"distcpMaxMaps", + "required":false, + "description":"Maximum number of mappers for DistCP", + "example":"1" + }, + { + "propertyName":"distcpMapBandwidth", + "required":false, + "description":"Bandwidth in MB for each mapper in DistCP", + "example":"100" + }, + { + "propertyName":"tdeEncryptionEnabled", + "required":false, + "description":"Set this flag to true if TDE encryption is enabled on source and target. Default value is false", + "example":"true" + }, + { + "propertyName":"jobNotificationType", + "required":false, + "description":"Email Notification for Falcon instance completion", + "example":"email" + }, + { + "propertyName":"jobNotificationReceivers", + "required":false, + "description":"Comma separated email Id's", + "example":"[email protected], [email protected]" + } + ] +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-template.xml ---------------------------------------------------------------------- diff --git a/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-template.xml b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-template.xml new file mode 100644 index 0000000..4497bb4 --- /dev/null +++ b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-template.xml @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + +<process name="##jobName##" xmlns="uri:falcon:process:0.1"> + <clusters> + <!-- source --> + <cluster name="##jobClusterName##"> + <validity end="##jobValidityEnd##" start="##jobValidityStart##"/> + </cluster> + </clusters> + + <tags/> + + <parallel>1</parallel> + <!-- Replication needs to run only once to catch up --> + <order>LAST_ONLY</order> + <frequency>##jobFrequency##</frequency> + <timezone>##jobTimezone##</timezone> + + <properties> + <property name="oozie.wf.subworkflow.classpath.inheritance" value="true"/> + </properties> + + <workflow name="##jobWorkflowName##" engine="##jobWorkflowEngine##" + path="##jobWorkflowPath##" lib="##jobWorkflowLibPath##"/> + <retry policy="##jobRetryPolicy##" delay="##jobRetryDelay##" attempts="3"/> + <notification type="##jobNotificationType##" to="##jobNotificationReceivers##"/> + <ACL/> +</process> http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-workflow.xml ---------------------------------------------------------------------- diff --git a/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-workflow.xml b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-workflow.xml new file mode 100644 index 0000000..4bf048f --- /dev/null +++ b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-workflow.xml @@ -0,0 +1,363 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> +<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-dr-hive-workflow'> + <credentials> + <credential name='hive_src_credentials' type='hcat'> + <property> + <name>hcat.metastore.uri</name> + <value>${sourceMetastoreUri}</value> + </property> + <property> + <name>hcat.metastore.principal</name> + <value>${sourceHiveMetastoreKerberosPrincipal}</value> + </property> + </credential> + <credential name='hive_tgt_credentials' type='hcat'> + <property> + <name>hcat.metastore.uri</name> + <value>${targetMetastoreUri}</value> + </property> + <property> + <name>hcat.metastore.principal</name> + <value>${targetHiveMetastoreKerberosPrincipal}</value> + </property> + </credential> + <credential name="hive2_src_credentials" type="hive2"> + <property> + <name>hive2.server.principal</name> + <value>${sourceHive2KerberosPrincipal}</value> + </property> + <property> + <name>hive2.jdbc.url</name> + <value>jdbc:${sourceHiveServer2Uri}/${sourceDatabase}</value> + </property> + </credential> + <credential name="hive2_tgt_credentials" type="hive2"> + <property> + <name>hive2.server.principal</name> + <value>${targetHive2KerberosPrincipal}</value> + </property> + <property> + <name>hive2.jdbc.url</name> + <value>jdbc:${targetHiveServer2Uri}/${sourceDatabase}</value> + </property> + </credential> + </credentials> + <start to='last-event'/> + <action name="last-event" cred="hive_tgt_credentials"> + <java> + <job-tracker>${jobTracker}</job-tracker> + <name-node>${nameNode}</name-node> + <configuration> + <property> <!-- hadoop 2 parameter --> + <name>oozie.launcher.mapreduce.job.user.classpath.first</name> + <value>true</value> + </property> + <property> + <name>mapred.job.queue.name</name> + <value>${queueName}</value> + </property> + <property> + <name>oozie.launcher.mapred.job.priority</name> + <value>${jobPriority}</value> + </property> + <property> + <name>oozie.use.system.libpath</name> + <value>true</value> + </property> + <property> + <name>oozie.action.sharelib.for.java</name> + <value>distcp,hive,hive2,hcatalog</value> + </property> + <property> + <name>oozie.launcher.mapreduce.job.hdfs-servers</name> + <value>${sourceNN},${targetNN}</value> + </property> + <property> + <name>mapreduce.job.hdfs-servers</name> + <value>${sourceNN},${targetNN}</value> + </property> + </configuration> + <main-class>org.apache.falcon.hive.HiveDRTool</main-class> + <arg>-Dmapred.job.queue.name=${queueName}</arg> + <arg>-Dmapred.job.priority=${jobPriority}</arg> + <arg>-falconLibPath</arg> + <arg>${wf:conf("falcon.libpath")}</arg> + <arg>-sourceCluster</arg> + <arg>${sourceCluster}</arg> + <arg>-sourceMetastoreUri</arg> + <arg>${sourceMetastoreUri}</arg> + <arg>-sourceHiveServer2Uri</arg> + <arg>${sourceHiveServer2Uri}</arg> + <arg>-sourceDatabase</arg> + <arg>${sourceDatabase}</arg> + <arg>-sourceTable</arg> + <arg>${sourceTable}</arg> + <arg>-sourceStagingPath</arg> + <arg>${sourceStagingPath}</arg> + <arg>-sourceNN</arg> + <arg>${sourceNN}</arg> + <arg>-sourceNNKerberosPrincipal</arg> + <arg>${sourceNNKerberosPrincipal}</arg> + <arg>-sourceHiveMetastoreKerberosPrincipal</arg> + <arg>${sourceHiveMetastoreKerberosPrincipal}</arg> + <arg>-sourceHive2KerberosPrincipal</arg> + <arg>${sourceHive2KerberosPrincipal}</arg> + <arg>-targetCluster</arg> + <arg>${targetCluster}</arg> + <arg>-targetMetastoreUri</arg> + <arg>${targetMetastoreUri}</arg> + <arg>-targetHiveServer2Uri</arg> + <arg>${targetHiveServer2Uri}</arg> + <arg>-targetStagingPath</arg> + <arg>${targetStagingPath}</arg> + <arg>-targetNN</arg> + <arg>${targetNN}</arg> + <arg>-targetNNKerberosPrincipal</arg> + <arg>${targetNNKerberosPrincipal}</arg> + <arg>-targetHiveMetastoreKerberosPrincipal</arg> + <arg>${targetHiveMetastoreKerberosPrincipal}</arg> + <arg>-targetHive2KerberosPrincipal</arg> + <arg>${targetHive2KerberosPrincipal}</arg> + <arg>-maxEvents</arg> + <arg>${maxEvents}</arg> + <arg>-clusterForJobRun</arg> + <arg>${clusterForJobRun}</arg> + <arg>-clusterForJobRunWriteEP</arg> + <arg>${clusterForJobRunWriteEP}</arg> + <arg>-clusterForJobNNKerberosPrincipal</arg> + <arg>${clusterForJobNNKerberosPrincipal}</arg> + <arg>-tdeEncryptionEnabled</arg> + <arg>${tdeEncryptionEnabled}</arg> + <arg>-jobName</arg> + <arg>${jobName}-${nominalTime}</arg> + <arg>-executionStage</arg> + <arg>lastevents</arg> + </java> + <ok to="export-dr-replication"/> + <error to="fail"/> + </action> + <!-- Export Replication action --> + <action name="export-dr-replication" cred="hive_src_credentials,hive2_src_credentials"> + <java> + <job-tracker>${jobTracker}</job-tracker> + <name-node>${nameNode}</name-node> + <configuration> + <property> <!-- hadoop 2 parameter --> + <name>oozie.launcher.mapreduce.job.user.classpath.first</name> + <value>true</value> + </property> + <property> + <name>mapred.job.queue.name</name> + <value>${queueName}</value> + </property> + <property> + <name>oozie.launcher.mapred.job.priority</name> + <value>${jobPriority}</value> + </property> + <property> + <name>oozie.use.system.libpath</name> + <value>true</value> + </property> + <property> + <name>oozie.action.sharelib.for.java</name> + <value>distcp,hive,hive2,hcatalog</value> + </property> + <property> + <name>oozie.launcher.mapreduce.job.hdfs-servers</name> + <value>${sourceNN},${targetNN}</value> + </property> + <property> + <name>mapreduce.job.hdfs-servers</name> + <value>${sourceNN},${targetNN}</value> + </property> + </configuration> + <main-class>org.apache.falcon.hive.HiveDRTool</main-class> + <arg>-Dmapred.job.queue.name=${queueName}</arg> + <arg>-Dmapred.job.priority=${jobPriority}</arg> + <arg>-falconLibPath</arg> + <arg>${wf:conf("falcon.libpath")}</arg> + <arg>-replicationMaxMaps</arg> + <arg>${replicationMaxMaps}</arg> + <arg>-distcpMaxMaps</arg> + <arg>${distcpMaxMaps}</arg> + <arg>-sourceCluster</arg> + <arg>${sourceCluster}</arg> + <arg>-sourceMetastoreUri</arg> + <arg>${sourceMetastoreUri}</arg> + <arg>-sourceHiveServer2Uri</arg> + <arg>${sourceHiveServer2Uri}</arg> + <arg>-sourceDatabase</arg> + <arg>${sourceDatabase}</arg> + <arg>-sourceTable</arg> + <arg>${sourceTable}</arg> + <arg>-sourceStagingPath</arg> + <arg>${sourceStagingPath}</arg> + <arg>-sourceNN</arg> + <arg>${sourceNN}</arg> + <arg>-sourceNNKerberosPrincipal</arg> + <arg>${sourceNNKerberosPrincipal}</arg> + <arg>-sourceHiveMetastoreKerberosPrincipal</arg> + <arg>${sourceHiveMetastoreKerberosPrincipal}</arg> + <arg>-sourceHive2KerberosPrincipal</arg> + <arg>${sourceHive2KerberosPrincipal}</arg> + <arg>-targetCluster</arg> + <arg>${targetCluster}</arg> + <arg>-targetMetastoreUri</arg> + <arg>${targetMetastoreUri}</arg> + <arg>-targetHiveServer2Uri</arg> + <arg>${targetHiveServer2Uri}</arg> + <arg>-targetStagingPath</arg> + <arg>${targetStagingPath}</arg> + <arg>-targetNN</arg> + <arg>${targetNN}</arg> + <arg>-targetNNKerberosPrincipal</arg> + <arg>${targetNNKerberosPrincipal}</arg> + <arg>-targetHiveMetastoreKerberosPrincipal</arg> + <arg>${targetHiveMetastoreKerberosPrincipal}</arg> + <arg>-targetHive2KerberosPrincipal</arg> + <arg>${targetHive2KerberosPrincipal}</arg> + <arg>-maxEvents</arg> + <arg>${maxEvents}</arg> + <arg>-distcpMapBandwidth</arg> + <arg>${distcpMapBandwidth}</arg> + <arg>-clusterForJobRun</arg> + <arg>${clusterForJobRun}</arg> + <arg>-clusterForJobRunWriteEP</arg> + <arg>${clusterForJobRunWriteEP}</arg> + <arg>-clusterForJobNNKerberosPrincipal</arg> + <arg>${clusterForJobNNKerberosPrincipal}</arg> + <arg>-tdeEncryptionEnabled</arg> + <arg>${tdeEncryptionEnabled}</arg> + <arg>-jobName</arg> + <arg>${jobName}-${nominalTime}</arg> + <arg>-executionStage</arg> + <arg>export</arg> + <arg>-counterLogDir</arg> + <arg>${logDir}/job-${nominalTime}/${srcClusterName == 'NA' ? '' : srcClusterName}/</arg> + </java> + <ok to="import-dr-replication"/> + <error to="fail"/> + </action> + <!-- Import Replication action --> + <action name="import-dr-replication" cred="hive_tgt_credentials,hive2_tgt_credentials"> + <java> + <job-tracker>${jobTracker}</job-tracker> + <name-node>${nameNode}</name-node> + <configuration> + <property> <!-- hadoop 2 parameter --> + <name>oozie.launcher.mapreduce.job.user.classpath.first</name> + <value>true</value> + </property> + <property> + <name>mapred.job.queue.name</name> + <value>${queueName}</value> + </property> + <property> + <name>oozie.launcher.mapred.job.priority</name> + <value>${jobPriority}</value> + </property> + <property> + <name>oozie.use.system.libpath</name> + <value>true</value> + </property> + <property> + <name>oozie.action.sharelib.for.java</name> + <value>distcp,hive,hive2,hcatalog</value> + </property> + <property> + <name>oozie.launcher.mapreduce.job.hdfs-servers</name> + <value>${sourceNN},${targetNN}</value> + </property> + <property> + <name>mapreduce.job.hdfs-servers</name> + <value>${sourceNN},${targetNN}</value> + </property> + </configuration> + <main-class>org.apache.falcon.hive.HiveDRTool</main-class> + <arg>-Dmapred.job.queue.name=${queueName}</arg> + <arg>-Dmapred.job.priority=${jobPriority}</arg> + <arg>-falconLibPath</arg> + <arg>${wf:conf("falcon.libpath")}</arg> + <arg>-replicationMaxMaps</arg> + <arg>${replicationMaxMaps}</arg> + <arg>-distcpMaxMaps</arg> + <arg>${distcpMaxMaps}</arg> + <arg>-sourceCluster</arg> + <arg>${sourceCluster}</arg> + <arg>-sourceMetastoreUri</arg> + <arg>${sourceMetastoreUri}</arg> + <arg>-sourceHiveServer2Uri</arg> + <arg>${sourceHiveServer2Uri}</arg> + <arg>-sourceDatabase</arg> + <arg>${sourceDatabase}</arg> + <arg>-sourceTable</arg> + <arg>${sourceTable}</arg> + <arg>-sourceStagingPath</arg> + <arg>${sourceStagingPath}</arg> + <arg>-sourceNN</arg> + <arg>${sourceNN}</arg> + <arg>-sourceNNKerberosPrincipal</arg> + <arg>${sourceNNKerberosPrincipal}</arg> + <arg>-sourceHiveMetastoreKerberosPrincipal</arg> + <arg>${sourceHiveMetastoreKerberosPrincipal}</arg> + <arg>-sourceHive2KerberosPrincipal</arg> + <arg>${sourceHive2KerberosPrincipal}</arg> + <arg>-targetCluster</arg> + <arg>${targetCluster}</arg> + <arg>-targetMetastoreUri</arg> + <arg>${targetMetastoreUri}</arg> + <arg>-targetHiveServer2Uri</arg> + <arg>${targetHiveServer2Uri}</arg> + <arg>-targetStagingPath</arg> + <arg>${targetStagingPath}</arg> + <arg>-targetNN</arg> + <arg>${targetNN}</arg> + <arg>-targetNNKerberosPrincipal</arg> + <arg>${targetNNKerberosPrincipal}</arg> + <arg>-targetHiveMetastoreKerberosPrincipal</arg> + <arg>${targetHiveMetastoreKerberosPrincipal}</arg> + <arg>-targetHive2KerberosPrincipal</arg> + <arg>${targetHive2KerberosPrincipal}</arg> + <arg>-maxEvents</arg> + <arg>${maxEvents}</arg> + <arg>-distcpMapBandwidth</arg> + <arg>${distcpMapBandwidth}</arg> + <arg>-clusterForJobRun</arg> + <arg>${clusterForJobRun}</arg> + <arg>-clusterForJobRunWriteEP</arg> + <arg>${clusterForJobRunWriteEP}</arg> + <arg>-clusterForJobNNKerberosPrincipal</arg> + <arg>${clusterForJobNNKerberosPrincipal}</arg> + <arg>-tdeEncryptionEnabled</arg> + <arg>${tdeEncryptionEnabled}</arg> + <arg>-jobName</arg> + <arg>${jobName}-${nominalTime}</arg> + <arg>-executionStage</arg> + <arg>import</arg> + </java> + <ok to="end"/> + <error to="fail"/> + </action> + <kill name="fail"> + <message> + Workflow action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + </message> + </kill> + <end name="end"/> +</workflow-app> http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-template.xml ---------------------------------------------------------------------- diff --git a/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-template.xml b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-template.xml new file mode 100644 index 0000000..4497bb4 --- /dev/null +++ b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-template.xml @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + +<process name="##jobName##" xmlns="uri:falcon:process:0.1"> + <clusters> + <!-- source --> + <cluster name="##jobClusterName##"> + <validity end="##jobValidityEnd##" start="##jobValidityStart##"/> + </cluster> + </clusters> + + <tags/> + + <parallel>1</parallel> + <!-- Replication needs to run only once to catch up --> + <order>LAST_ONLY</order> + <frequency>##jobFrequency##</frequency> + <timezone>##jobTimezone##</timezone> + + <properties> + <property name="oozie.wf.subworkflow.classpath.inheritance" value="true"/> + </properties> + + <workflow name="##jobWorkflowName##" engine="##jobWorkflowEngine##" + path="##jobWorkflowPath##" lib="##jobWorkflowLibPath##"/> + <retry policy="##jobRetryPolicy##" delay="##jobRetryDelay##" attempts="3"/> + <notification type="##jobNotificationType##" to="##jobNotificationReceivers##"/> + <ACL/> +</process> http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-workflow.xml ---------------------------------------------------------------------- diff --git a/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-workflow.xml b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-workflow.xml new file mode 100644 index 0000000..9f9bf92 --- /dev/null +++ b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-workflow.xml @@ -0,0 +1,255 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> +<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-dr-hive-workflow'> + <start to='last-event'/> + <action name="last-event"> + <java> + <job-tracker>${jobTracker}</job-tracker> + <name-node>${nameNode}</name-node> + <configuration> + <property> <!-- hadoop 2 parameter --> + <name>oozie.launcher.mapreduce.job.user.classpath.first</name> + <value>true</value> + </property> + <property> + <name>mapred.job.queue.name</name> + <value>${queueName}</value> + </property> + <property> + <name>oozie.launcher.mapred.job.priority</name> + <value>${jobPriority}</value> + </property> + <property> + <name>oozie.use.system.libpath</name> + <value>true</value> + </property> + <property> + <name>oozie.action.sharelib.for.java</name> + <value>distcp,hive,hive2,hcatalog</value> + </property> + </configuration> + <main-class>org.apache.falcon.hive.HiveDRTool</main-class> + <arg>-Dmapred.job.queue.name=${queueName}</arg> + <arg>-Dmapred.job.priority=${jobPriority}</arg> + <arg>-falconLibPath</arg> + <arg>${wf:conf("falcon.libpath")}</arg> + <arg>-sourceCluster</arg> + <arg>${sourceCluster}</arg> + <arg>-sourceMetastoreUri</arg> + <arg>${sourceMetastoreUri}</arg> + <arg>-sourceHiveServer2Uri</arg> + <arg>${sourceHiveServer2Uri}</arg> + <arg>-sourceDatabase</arg> + <arg>${sourceDatabase}</arg> + <arg>-sourceTable</arg> + <arg>${sourceTable}</arg> + <arg>-sourceStagingPath</arg> + <arg>${sourceStagingPath}</arg> + <arg>-sourceNN</arg> + <arg>${sourceNN}</arg> + <arg>-targetCluster</arg> + <arg>${targetCluster}</arg> + <arg>-targetMetastoreUri</arg> + <arg>${targetMetastoreUri}</arg> + <arg>-targetHiveServer2Uri</arg> + <arg>${targetHiveServer2Uri}</arg> + <arg>-targetStagingPath</arg> + <arg>${targetStagingPath}</arg> + <arg>-targetNN</arg> + <arg>${targetNN}</arg> + <arg>-maxEvents</arg> + <arg>${maxEvents}</arg> + <arg>-clusterForJobRun</arg> + <arg>${clusterForJobRun}</arg> + <arg>-clusterForJobRunWriteEP</arg> + <arg>${clusterForJobRunWriteEP}</arg> + <arg>-tdeEncryptionEnabled</arg> + <arg>${tdeEncryptionEnabled}</arg> + <arg>-jobName</arg> + <arg>${jobName}-${nominalTime}</arg> + <arg>-executionStage</arg> + <arg>lastevents</arg> + </java> + <ok to="export-dr-replication"/> + <error to="fail"/> + </action> + <!-- Export Replication action --> + <action name="export-dr-replication"> + <java> + <job-tracker>${jobTracker}</job-tracker> + <name-node>${nameNode}</name-node> + <configuration> + <property> <!-- hadoop 2 parameter --> + <name>oozie.launcher.mapreduce.job.user.classpath.first</name> + <value>true</value> + </property> + <property> + <name>mapred.job.queue.name</name> + <value>${queueName}</value> + </property> + <property> + <name>oozie.launcher.mapred.job.priority</name> + <value>${jobPriority}</value> + </property> + <property> + <name>oozie.use.system.libpath</name> + <value>true</value> + </property> + <property> + <name>oozie.action.sharelib.for.java</name> + <value>distcp,hive,hive2,hcatalog</value> + </property> + </configuration> + <main-class>org.apache.falcon.hive.HiveDRTool</main-class> + <arg>-Dmapred.job.queue.name=${queueName}</arg> + <arg>-Dmapred.job.priority=${jobPriority}</arg> + <arg>-falconLibPath</arg> + <arg>${wf:conf("falcon.libpath")}</arg> + <arg>-replicationMaxMaps</arg> + <arg>${replicationMaxMaps}</arg> + <arg>-distcpMaxMaps</arg> + <arg>${distcpMaxMaps}</arg> + <arg>-sourceCluster</arg> + <arg>${sourceCluster}</arg> + <arg>-sourceMetastoreUri</arg> + <arg>${sourceMetastoreUri}</arg> + <arg>-sourceHiveServer2Uri</arg> + <arg>${sourceHiveServer2Uri}</arg> + <arg>-sourceDatabase</arg> + <arg>${sourceDatabase}</arg> + <arg>-sourceTable</arg> + <arg>${sourceTable}</arg> + <arg>-sourceStagingPath</arg> + <arg>${sourceStagingPath}</arg> + <arg>-sourceNN</arg> + <arg>${sourceNN}</arg> + <arg>-targetCluster</arg> + <arg>${targetCluster}</arg> + <arg>-targetMetastoreUri</arg> + <arg>${targetMetastoreUri}</arg> + <arg>-targetHiveServer2Uri</arg> + <arg>${targetHiveServer2Uri}</arg> + <arg>-targetStagingPath</arg> + <arg>${targetStagingPath}</arg> + <arg>-targetNN</arg> + <arg>${targetNN}</arg> + <arg>-maxEvents</arg> + <arg>${maxEvents}</arg> + <arg>-distcpMapBandwidth</arg> + <arg>${distcpMapBandwidth}</arg> + <arg>-clusterForJobRun</arg> + <arg>${clusterForJobRun}</arg> + <arg>-clusterForJobRunWriteEP</arg> + <arg>${clusterForJobRunWriteEP}</arg> + <arg>-tdeEncryptionEnabled</arg> + <arg>${tdeEncryptionEnabled}</arg> + <arg>-jobName</arg> + <arg>${jobName}-${nominalTime}</arg> + <arg>-executionStage</arg> + <arg>export</arg> + <arg>-counterLogDir</arg> + <arg>${logDir}/job-${nominalTime}/${srcClusterName == 'NA' ? '' : srcClusterName}/</arg> + </java> + <ok to="import-dr-replication"/> + <error to="fail"/> + </action> + <!-- Import Replication action --> + <action name="import-dr-replication"> + <java> + <job-tracker>${jobTracker}</job-tracker> + <name-node>${nameNode}</name-node> + <configuration> + <property> <!-- hadoop 2 parameter --> + <name>oozie.launcher.mapreduce.job.user.classpath.first</name> + <value>true</value> + </property> + <property> + <name>mapred.job.queue.name</name> + <value>${queueName}</value> + </property> + <property> + <name>oozie.launcher.mapred.job.priority</name> + <value>${jobPriority}</value> + </property> + <property> + <name>oozie.use.system.libpath</name> + <value>true</value> + </property> + <property> + <name>oozie.action.sharelib.for.java</name> + <value>distcp,hive,hive2,hcatalog</value> + </property> + </configuration> + <main-class>org.apache.falcon.hive.HiveDRTool</main-class> + <arg>-Dmapred.job.queue.name=${queueName}</arg> + <arg>-Dmapred.job.priority=${jobPriority}</arg> + <arg>-falconLibPath</arg> + <arg>${wf:conf("falcon.libpath")}</arg> + <arg>-replicationMaxMaps</arg> + <arg>${replicationMaxMaps}</arg> + <arg>-distcpMaxMaps</arg> + <arg>${distcpMaxMaps}</arg> + <arg>-sourceCluster</arg> + <arg>${sourceCluster}</arg> + <arg>-sourceMetastoreUri</arg> + <arg>${sourceMetastoreUri}</arg> + <arg>-sourceHiveServer2Uri</arg> + <arg>${sourceHiveServer2Uri}</arg> + <arg>-sourceDatabase</arg> + <arg>${sourceDatabase}</arg> + <arg>-sourceTable</arg> + <arg>${sourceTable}</arg> + <arg>-sourceStagingPath</arg> + <arg>${sourceStagingPath}</arg> + <arg>-sourceNN</arg> + <arg>${sourceNN}</arg> + <arg>-targetCluster</arg> + <arg>${targetCluster}</arg> + <arg>-targetMetastoreUri</arg> + <arg>${targetMetastoreUri}</arg> + <arg>-targetHiveServer2Uri</arg> + <arg>${targetHiveServer2Uri}</arg> + <arg>-targetStagingPath</arg> + <arg>${targetStagingPath}</arg> + <arg>-targetNN</arg> + <arg>${targetNN}</arg> + <arg>-maxEvents</arg> + <arg>${maxEvents}</arg> + <arg>-distcpMapBandwidth</arg> + <arg>${distcpMapBandwidth}</arg> + <arg>-clusterForJobRun</arg> + <arg>${clusterForJobRun}</arg> + <arg>-clusterForJobRunWriteEP</arg> + <arg>${clusterForJobRunWriteEP}</arg> + <arg>-tdeEncryptionEnabled</arg> + <arg>${tdeEncryptionEnabled}</arg> + <arg>-jobName</arg> + <arg>${jobName}-${nominalTime}</arg> + <arg>-executionStage</arg> + <arg>import</arg> + </java> + <ok to="end"/> + <error to="fail"/> + </action> + <kill name="fail"> + <message> + Workflow action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + </message> + </kill> + <end name="end"/> +</workflow-app> http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDRArgs.java ---------------------------------------------------------------------- diff --git a/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDRArgs.java b/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDRArgs.java index c9ad47e..71b9043 100644 --- a/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDRArgs.java +++ b/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDRArgs.java @@ -32,7 +32,7 @@ public enum HiveDRArgs { SOURCE_HS2_URI("sourceHiveServer2Uri", "source HS2 uri"), SOURCE_DATABASE("sourceDatabase", "comma source databases"), SOURCE_TABLE("sourceTable", "comma source tables"), - SOURCE_STAGING_PATH("sourceStagingPath", "source staging path for data"), + SOURCE_STAGING_PATH("sourceStagingPath", "source staging path for data", false), // source hadoop endpoints SOURCE_NN("sourceNN", "source name node"), @@ -47,7 +47,7 @@ public enum HiveDRArgs { TARGET_METASTORE_URI("targetMetastoreUri", "source meta store uri"), TARGET_HS2_URI("targetHiveServer2Uri", "source meta store uri"), - TARGET_STAGING_PATH("targetStagingPath", "source staging path for data"), + TARGET_STAGING_PATH("targetStagingPath", "source staging path for data", false), // target hadoop endpoints TARGET_NN("targetNN", "target name node"), @@ -70,16 +70,13 @@ public enum HiveDRArgs { // Map Bandwidth DISTCP_MAP_BANDWIDTH("distcpMapBandwidth", "map bandwidth in mb", false), - JOB_NAME("drJobName", "unique job name"), + JOB_NAME("jobName", "unique job name"), CLUSTER_FOR_JOB_RUN("clusterForJobRun", "cluster where job runs"), JOB_CLUSTER_NN("clusterForJobRunWriteEP", "write end point of cluster where job runs"), JOB_CLUSTER_NN_KERBEROS_PRINCIPAL("clusterForJobNNKerberosPrincipal", "Namenode kerberos principal of cluster on which replication job runs", false), - - FALCON_LIBPATH("falconLibPath", "Falcon Lib Path for Jar files", false), - KEEP_HISTORY("keepHistory", "Keep history of events file generated", false), EXECUTION_STAGE("executionStage", "Flag for workflow stage execution", false), COUNTER_LOGDIR("counterLogDir", "Log directory to store counter file", false); http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDROptions.java ---------------------------------------------------------------------- diff --git a/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDROptions.java b/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDROptions.java index 868ec8d..0096727 100644 --- a/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDROptions.java +++ b/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDROptions.java @@ -24,7 +24,7 @@ import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.lang3.StringUtils; -import org.apache.falcon.hive.exception.HiveReplicationException; +import org.apache.falcon.hive.util.FileUtils; import java.io.File; import java.util.Arrays; @@ -70,11 +70,14 @@ public class HiveDROptions { return Arrays.asList(context.get(HiveDRArgs.SOURCE_TABLE).trim().split(",")); } - public String getSourceStagingPath() throws HiveReplicationException { - if (StringUtils.isNotEmpty(context.get(HiveDRArgs.SOURCE_STAGING_PATH))) { - return context.get(HiveDRArgs.SOURCE_STAGING_PATH) + File.separator + getJobName(); + public String getSourceStagingPath() { + String stagingPath = context.get(HiveDRArgs.SOURCE_STAGING_PATH); + if (StringUtils.isNotBlank(stagingPath)) { + stagingPath = StringUtils.removeEnd(stagingPath, File.separator); + return stagingPath + File.separator + getJobName(); + } else { + return FileUtils.DEFAULT_EVENT_STORE_PATH + getJobName(); } - throw new HiveReplicationException("Source StagingPath cannot be empty"); } public String getSourceWriteEP() { @@ -100,15 +103,19 @@ public class HiveDROptions { public String getTargetMetastoreKerberosPrincipal() { return context.get(HiveDRArgs.TARGET_HIVE_METASTORE_KERBEROS_PRINCIPAL); } + public String getTargetHive2KerberosPrincipal() { return context.get(HiveDRArgs.TARGET_HIVE2_KERBEROS_PRINCIPAL); } - public String getTargetStagingPath() throws HiveReplicationException { - if (StringUtils.isNotEmpty(context.get(HiveDRArgs.TARGET_STAGING_PATH))) { - return context.get(HiveDRArgs.TARGET_STAGING_PATH) + File.separator + getJobName(); + public String getTargetStagingPath() { + String stagingPath = context.get(HiveDRArgs.TARGET_STAGING_PATH); + if (StringUtils.isNotBlank(stagingPath)) { + stagingPath = StringUtils.removeEnd(stagingPath, File.separator); + return stagingPath + File.separator + getJobName(); + } else { + return FileUtils.DEFAULT_EVENT_STORE_PATH + getJobName(); } - throw new HiveReplicationException("Target StagingPath cannot be empty"); } public String getReplicationMaxMaps() { @@ -135,23 +142,10 @@ public class HiveDROptions { return context.get(HiveDRArgs.JOB_CLUSTER_NN_KERBEROS_PRINCIPAL); } - public void setSourceStagingDir(String path) { - context.put(HiveDRArgs.SOURCE_STAGING_PATH, path); - } - - public void setTargetStagingDir(String path) { - context.put(HiveDRArgs.TARGET_STAGING_PATH, path); - } - public String getExecutionStage() { return context.get(HiveDRArgs.EXECUTION_STAGE); } - public boolean isTDEEncryptionEnabled() { - return StringUtils.isEmpty(context.get(HiveDRArgs.TDE_ENCRYPTION_ENABLED)) - ? false : Boolean.valueOf(context.get(HiveDRArgs.TDE_ENCRYPTION_ENABLED)); - } - public boolean shouldBlock() { return true; } http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/recipes/hdfs-replication/README.txt ---------------------------------------------------------------------- diff --git a/addons/recipes/hdfs-replication/README.txt b/addons/recipes/hdfs-replication/README.txt deleted file mode 100644 index 5742d43..0000000 --- a/addons/recipes/hdfs-replication/README.txt +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -HDFS Directory Replication Recipe - -Overview -This recipe implements replicating arbitrary directories on HDFS from one -Hadoop cluster to another Hadoop cluster. -This piggy backs on replication solution in Falcon which uses the DistCp tool. - -Use Case -* Copy directories between HDFS clusters with out dated partitions -* Archive directories from HDFS to Cloud. Ex: S3, Azure WASB - -Limitations -As the data volume and number of files grow, this can get inefficient. http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/recipes/hdfs-replication/pom.xml ---------------------------------------------------------------------- diff --git a/addons/recipes/hdfs-replication/pom.xml b/addons/recipes/hdfs-replication/pom.xml deleted file mode 100644 index 98d9795..0000000 --- a/addons/recipes/hdfs-replication/pom.xml +++ /dev/null @@ -1,32 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --> - -<project xmlns="http://maven.apache.org/POM/4.0.0" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - - <modelVersion>4.0.0</modelVersion> - <groupId>org.apache.falcon.recipes</groupId> - <artifactId>falcon-hdfs-replication-recipe</artifactId> - <version>0.10-SNAPSHOT</version> - <description>Apache Falcon Sample Hdfs Replicaiton Recipe</description> - <name>Apache Falcon Sample Hdfs Replication Recipe</name> - <packaging>jar</packaging> -</project> http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-template.xml ---------------------------------------------------------------------- diff --git a/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-template.xml b/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-template.xml deleted file mode 100644 index 441a189..0000000 --- a/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-template.xml +++ /dev/null @@ -1,44 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --> - -<process name="##falcon.recipe.job.name##" xmlns="uri:falcon:process:0.1"> - <clusters> - <!-- source --> - <cluster name="##falcon.recipe.cluster.name##"> - <validity end="##falcon.recipe.cluster.validity.end##" start="##falcon.recipe.cluster.validity.start##"/> - </cluster> - </clusters> - - <tags>_falcon_mirroring_type=HDFS</tags> - - <parallel>1</parallel> - <!-- Dir replication needs to run only once to catch up --> - <order>LAST_ONLY</order> - <frequency>##falcon.recipe.frequency##</frequency> - <timezone>UTC</timezone> - - <properties> - <property name="oozie.wf.subworkflow.classpath.inheritance" value="true"/> - </properties> - - <workflow name="##falcon.recipe.workflow.name##" engine="oozie" path="/apps/data-mirroring/workflows/hdfs-replication-workflow.xml" lib="##workflow.lib.path##"/> - <retry policy="##falcon.recipe.retry.policy##" delay="##falcon.recipe.retry.delay##" attempts="3"/> - <notification type="##falcon.recipe.notification.type##" to="##falcon.recipe.notification.receivers##"/> - <ACL/> -</process> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-workflow.xml ---------------------------------------------------------------------- diff --git a/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-workflow.xml b/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-workflow.xml deleted file mode 100644 index c1966be..0000000 --- a/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-workflow.xml +++ /dev/null @@ -1,82 +0,0 @@ -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --> -<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-dr-fs-workflow'> - <start to='dr-replication'/> - <!-- Replication action --> - <action name="dr-replication"> - <java> - <job-tracker>${jobTracker}</job-tracker> - <name-node>${nameNode}</name-node> - <configuration> - <property> <!-- hadoop 2 parameter --> - <name>oozie.launcher.mapreduce.job.user.classpath.first</name> - <value>true</value> - </property> - <property> - <name>mapred.job.queue.name</name> - <value>${queueName}</value> - </property> - <property> - <name>oozie.launcher.mapred.job.priority</name> - <value>${jobPriority}</value> - </property> - <property> - <name>oozie.use.system.libpath</name> - <value>true</value> - </property> - <property> - <name>oozie.action.sharelib.for.java</name> - <value>distcp</value> - </property> - <property> - <name>oozie.launcher.oozie.libpath</name> - <value>${wf:conf("falcon.libpath")}</value> - </property> - <property> - <name>oozie.launcher.mapreduce.job.hdfs-servers</name> - <value>${drSourceClusterFS},${drTargetClusterFS}</value> - </property> - </configuration> - <main-class>org.apache.falcon.replication.FeedReplicator</main-class> - <arg>-Dmapred.job.queue.name=${queueName}</arg> - <arg>-Dmapred.job.priority=${jobPriority}</arg> - <arg>-maxMaps</arg> - <arg>${distcpMaxMaps}</arg> - <arg>-mapBandwidth</arg> - <arg>${distcpMapBandwidth}</arg> - <arg>-sourcePaths</arg> - <arg>${drSourceDir}</arg> - <arg>-targetPath</arg> - <arg>${drTargetClusterFS}${drTargetDir}</arg> - <arg>-falconFeedStorageType</arg> - <arg>FILESYSTEM</arg> - <arg>-availabilityFlag</arg> - <arg>${availabilityFlag == 'NA' ? "NA" : availabilityFlag}</arg> - <arg>-counterLogDir</arg> - <arg>${logDir}/job-${nominalTime}/${srcClusterName == 'NA' ? '' : srcClusterName}</arg> - </java> - <ok to="end"/> - <error to="fail"/> - </action> - <kill name="fail"> - <message> - Workflow action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - </message> - </kill> - <end name="end"/> -</workflow-app>
