Repository: falcon Updated Branches: refs/heads/master 86fe67f53 -> fd10aa410
FALCON-671 Add a test in falcon-regression where one oozie workflow has various actions like mr, hive, pig. Contributed by Karishma Gulati Project: http://git-wip-us.apache.org/repos/asf/falcon/repo Commit: http://git-wip-us.apache.org/repos/asf/falcon/commit/fd10aa41 Tree: http://git-wip-us.apache.org/repos/asf/falcon/tree/fd10aa41 Diff: http://git-wip-us.apache.org/repos/asf/falcon/diff/fd10aa41 Branch: refs/heads/master Commit: fd10aa4106a2706b8e1f61dd17291338eec8f722 Parents: 86fe67f Author: Ruslan Ostafiychuk <[email protected]> Authored: Thu Feb 12 14:26:59 2015 +0200 Committer: Ruslan Ostafiychuk <[email protected]> Committed: Thu Feb 12 14:26:59 2015 +0200 ---------------------------------------------------------------------- falcon-regression/CHANGES.txt | 3 + .../falcon/regression/core/util/BundleUtil.java | 4 + .../falcon/regression/core/util/HCatUtil.java | 22 ++ .../falcon/regression/core/util/OSUtil.java | 2 + .../falcon/regression/CombinedActionsTest.java | 210 +++++++++++++++++++ .../resources/combinedActions/cluster-0.1.xml | 48 +++++ .../combinedActions/feed-template1.xml | 51 +++++ .../combinedActions/feed-template1_hcat.xml | 41 ++++ .../combinedActions/feed-template2.xml | 51 +++++ .../combinedActions/feed-template2_hcat.xml | 47 +++++ .../combinedActions/feed-template3.xml | 51 +++++ .../resources/combinedActions/process-agg.xml | 51 +++++ .../src/test/resources/combinedWorkflow/id.pig | 20 ++ .../test/resources/combinedWorkflow/script.hql | 19 ++ .../resources/combinedWorkflow/workflow.xml | 110 ++++++++++ 15 files changed, 730 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/CHANGES.txt ---------------------------------------------------------------------- diff --git a/falcon-regression/CHANGES.txt b/falcon-regression/CHANGES.txt index 6351efd..a88fb85 100644 --- a/falcon-regression/CHANGES.txt +++ b/falcon-regression/CHANGES.txt @@ -6,6 +6,9 @@ Trunk (Unreleased) NEW FEATURES + FALCON-671 Add a test in falcon-regression where one oozie workflow has various actions like + mr, hive, pig (Karishma Gulati via Ruslan Ostafiychuk) + FALCON-985 Upgrading jsch version in falcon regression pom (Pragya M via Samarth) FALCON-884 Add option to dump xmls generated by falcon (Raghav Kumar Gautam via Ruslan http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/BundleUtil.java ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/BundleUtil.java b/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/BundleUtil.java index 6e25a60..4218815 100644 --- a/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/BundleUtil.java +++ b/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/BundleUtil.java @@ -83,6 +83,10 @@ public final class BundleUtil { return readBundleFromFolder("updateBundle"); } + public static Bundle readCombinedActionsBundle() throws IOException { + return readBundleFromFolder("combinedActions"); + } + private static Bundle readBundleFromFolder(final String folderPath) throws IOException { LOGGER.info("Loading xmls from directory: " + folderPath); File directory = null; http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/HCatUtil.java ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/HCatUtil.java b/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/HCatUtil.java index d878ecb..3ac0e6a 100644 --- a/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/HCatUtil.java +++ b/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/HCatUtil.java @@ -21,10 +21,17 @@ package org.apache.falcon.regression.core.util; import org.apache.falcon.regression.core.enumsAndConstants.MerlinConstants; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hive.hcatalog.api.HCatAddPartitionDesc; import org.apache.hive.hcatalog.api.HCatClient; import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.testng.Assert; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * util methods for HCat. @@ -50,6 +57,21 @@ public final class HCatUtil { return HCatClient.create(hcatConf); } + public static void addPartitionsToTable(HCatClient clusterHC, List<String> partitions, + List<String> partitionLocations, String partitionCol, String dbName, String tableName) throws HCatException { + Assert.assertEquals(partitions.size(), partitionLocations.size(), + "Number of locations is not same as number of partitions."); + final List<HCatAddPartitionDesc> partitionDesc = new ArrayList<HCatAddPartitionDesc>(); + for (int i = 0; i < partitions.size(); ++i) { + final String partition = partitions.get(i); + final Map<String, String> onePartition = new HashMap<String, String>(); + onePartition.put(partitionCol, partition); + final String partitionLoc = partitionLocations.get(i); + partitionDesc.add(HCatAddPartitionDesc.create(dbName, tableName, partitionLoc, onePartition).build()); + } + clusterHC.addPartitions(partitionDesc); + } + @SuppressWarnings("deprecation") public static HCatFieldSchema getStringSchema(String fieldName, String comment) throws HCatException { return new HCatFieldSchema(fieldName, HCatFieldSchema.Type.STRING, comment); http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/OSUtil.java ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/OSUtil.java b/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/OSUtil.java index ed29d07..dd5e194 100644 --- a/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/OSUtil.java +++ b/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/OSUtil.java @@ -44,6 +44,8 @@ public final class OSUtil { String.format(OOZIE_EXAMPLE_INPUT_DATA + "normalInput%s", SEPARATOR); public static final String SINGLE_FILE = String.format(OOZIE_EXAMPLE_INPUT_DATA + "SingleFile%s", SEPARATOR); + public static final String OOZIE_COMBINED_ACTIONS = + String.format(RESOURCES + "combinedWorkflow%s", SEPARATOR); public static String getPath(String... pathParts) { return StringUtils.join(pathParts, OSUtil.SEPARATOR); http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/java/org/apache/falcon/regression/CombinedActionsTest.java ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/java/org/apache/falcon/regression/CombinedActionsTest.java b/falcon-regression/merlin/src/test/java/org/apache/falcon/regression/CombinedActionsTest.java new file mode 100644 index 0000000..a0e1927 --- /dev/null +++ b/falcon-regression/merlin/src/test/java/org/apache/falcon/regression/CombinedActionsTest.java @@ -0,0 +1,210 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.falcon.regression; + +import org.apache.commons.lang.StringUtils; +import org.apache.falcon.entity.v0.EntityType; +import org.apache.falcon.entity.v0.Frequency; +import org.apache.falcon.entity.v0.feed.LocationType; +import org.apache.falcon.regression.Entities.FeedMerlin; +import org.apache.falcon.regression.core.bundle.Bundle; +import org.apache.falcon.regression.core.helpers.ColoHelper; +import org.apache.falcon.regression.core.util.*; +import org.apache.falcon.regression.testHelper.BaseTestClass; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hive.hcatalog.api.HCatClient; +import org.apache.hive.hcatalog.api.HCatCreateTableDesc; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.log4j.Logger; +import org.apache.oozie.client.CoordinatorAction; +import org.apache.oozie.client.OozieClient; +import org.joda.time.format.DateTimeFormat; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.List; + +/** + * Test where a single workflow contains multiple actions. + */ + +public class CombinedActionsTest extends BaseTestClass { + + private ColoHelper cluster = servers.get(0); + private FileSystem clusterFS = serverFS.get(0); + private OozieClient clusterOC = serverOC.get(0); + private HCatClient clusterHC; + + private final String hiveTestDir = "/HiveData"; + private final String baseTestHDFSDir = cleanAndGetTestDir() + hiveTestDir; + private final String inputHDFSDir = baseTestHDFSDir + "/input"; + private final String outputHDFSDir = baseTestHDFSDir + "/output"; + private String aggregateWorkflowDir = cleanAndGetTestDir() + "/aggregator"; + private static final Logger LOGGER = Logger.getLogger(CombinedActionsTest.class); + private static final String HCATDIR = OSUtil.getPath("src", "test", "resources", "hcat"); + private static final String LOCALHCATDATA = OSUtil.getPath(HCATDIR, "data"); + public static final String DBNAME = "default"; + public static final String COL1NAME = "id"; + public static final String COL2NAME = "value"; + public static final String PARTITIONCOLUMN = "dt"; + private final String inputTableName = "combinedactionstest_input_table"; + private final String outputTableName = "combinedactionstest_output_table"; + + private String pigMrTestDir = cleanAndGetTestDir() + "/pigMrData"; + private String inputPath = pigMrTestDir + "/input/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"; + private String outputPathPig = pigMrTestDir + "/output/pig/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"; + private String outputPathMr = pigMrTestDir + "/output/mr/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"; + + @BeforeClass(alwaysRun = true) + public void uploadWorkflow() throws Exception { + + HadoopUtil.uploadDir(clusterFS, aggregateWorkflowDir, OSUtil.OOZIE_COMBINED_ACTIONS); + } + + @BeforeMethod(alwaysRun = true) + public void setUp(Method method) throws Exception { + LOGGER.info("test name: " + method.getName()); + clusterHC = cluster.getClusterHelper().getHCatClient(); + bundles[0] = BundleUtil.readCombinedActionsBundle(); + bundles[0] = new Bundle(bundles[0], cluster); + bundles[0].generateUniqueBundle(this); + AssertUtil.assertSucceeded(prism.getClusterHelper().submitEntity(bundles[0].getClusters().get(0))); + } + + @AfterMethod(alwaysRun = true) + public void tearDown()throws Exception { + removeTestClassEntities(); + clusterHC.dropTable(DBNAME, inputTableName, true); + clusterHC.dropTable(DBNAME, outputTableName, true); + HadoopUtil.deleteDirIfExists(pigMrTestDir, clusterFS); + } + + /** + *Schedule a process, for which the oozie workflow contains multiple actions like hive, mr, pig + *The process should succeed. Fails right now due to: https://issues.apache.org/jira/browse/FALCON-670 + * + * @throws Exception + */ + + @Test + public void combinedMrPigHiveAction()throws Exception{ + + //create data for pig, mr and hcat jobs + final String startDate = "2010-01-01T20:00Z"; + final String endDate = "2010-01-02T04:00Z"; + + String inputFeedMrPig = bundles[0].getFeed("sampleFeed1"); + FeedMerlin feedObj = new FeedMerlin(inputFeedMrPig); + + HadoopUtil.deleteDirIfExists(pigMrTestDir + "/input", clusterFS); + List<String> dataDates = TimeUtil.getMinuteDatesOnEitherSide(startDate, endDate, 20); + + HadoopUtil.flattenAndPutDataInFolder(clusterFS, OSUtil.NORMAL_INPUT, pigMrTestDir + "/input", dataDates); + + final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH", "mm"}, "-"); + dataDates = TimeUtil.getMinuteDatesOnEitherSide(startDate, endDate, 60, DateTimeFormat.forPattern(datePattern)); + + final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, LOCALHCATDATA, + inputHDFSDir, dataDates); + + ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>(); + cols.add(HCatUtil.getStringSchema(COL1NAME, COL1NAME + " comment")); + cols.add(HCatUtil.getStringSchema(COL2NAME, COL2NAME + " comment")); + ArrayList<HCatFieldSchema> partitionCols = new ArrayList<HCatFieldSchema>(); + + partitionCols.add(HCatUtil.getStringSchema(PARTITIONCOLUMN, PARTITIONCOLUMN + " partition")); + clusterHC.createTable(HCatCreateTableDesc + .create(DBNAME, inputTableName, cols) + .partCols(partitionCols) + .ifNotExists(true) + .isTableExternal(true) + .location(inputHDFSDir) + .build()); + + clusterHC.createTable(HCatCreateTableDesc + .create(DBNAME, outputTableName, cols) + .partCols(partitionCols) + .ifNotExists(true) + .isTableExternal(true) + .location(outputHDFSDir) + .build()); + + HCatUtil.addPartitionsToTable(clusterHC, dataDates, dataset, "dt", DBNAME, inputTableName); + + final String tableUriPartitionFragment = StringUtils.join( + new String[]{"#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}", "${MINUTE}"}, "-"); + String inputTableUri = + "catalog:" + DBNAME + ":" + inputTableName + tableUriPartitionFragment; + String outputTableUri = + "catalog:" + DBNAME + ":" + outputTableName + tableUriPartitionFragment; + + //Set input and output feeds for bundle + //input feed for both mr and pig jobs + feedObj.setLocation(LocationType.DATA, inputPath); + LOGGER.info(feedObj.toString()); + AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString())); + + //output feed for pig jobs + String outputFeedPig = bundles[0].getFeed("sampleFeed2"); + feedObj = new FeedMerlin(outputFeedPig); + feedObj.setLocation(LocationType.DATA, outputPathPig); + feedObj.setFrequency(new Frequency("5", Frequency.TimeUnit.minutes)); + AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString())); + + //output feed for mr jobs + String outputFeedMr = bundles[0].getFeed("sampleFeed3"); + feedObj = new FeedMerlin(outputFeedMr); + feedObj.setLocation(LocationType.DATA, outputPathMr); + AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString())); + + //input feed for hcat jobs + String inputHive = bundles[0].getFeed("sampleFeedHCat1"); + feedObj = new FeedMerlin(inputHive); + feedObj.getTable().setUri(inputTableUri); + feedObj.setFrequency(new Frequency("1", Frequency.TimeUnit.hours)); + feedObj.getClusters().getClusters().get(0).getValidity() + .setStart(TimeUtil.oozieDateToDate(startDate).toDate()); + feedObj.getClusters().getClusters().get(0).getValidity() + .setEnd(TimeUtil.oozieDateToDate(endDate).toDate()); + AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString())); + + //output feed for hcat jobs + String outputHive = bundles[0].getFeed("sampleFeedHCat2"); + feedObj = new FeedMerlin(outputHive); + feedObj.getTable().setUri(outputTableUri); + feedObj.setFrequency(new Frequency("1", Frequency.TimeUnit.hours)); + feedObj.getClusters().getClusters().get(0).getValidity() + .setStart(TimeUtil.oozieDateToDate(startDate).toDate()); + feedObj.getClusters().getClusters().get(0).getValidity() + .setEnd(TimeUtil.oozieDateToDate(endDate).toDate()); + AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString())); + + bundles[0].setProcessWorkflow(aggregateWorkflowDir); + bundles[0].setProcessValidity(startDate, endDate); + bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours); + bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)"); + AssertUtil.assertSucceeded(prism.getProcessHelper().submitAndSchedule(bundles[0].getProcessData())); + InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(), + 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); + } +} http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/cluster-0.1.xml ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/cluster-0.1.xml b/falcon-regression/merlin/src/test/resources/combinedActions/cluster-0.1.xml new file mode 100644 index 0000000..40394e7 --- /dev/null +++ b/falcon-regression/merlin/src/test/resources/combinedActions/cluster-0.1.xml @@ -0,0 +1,48 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + +<cluster colo="ua1" description="" name="corp" xmlns="uri:falcon:cluster:0.1" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <interfaces> + <interface type="readonly" endpoint="http://host:50070" + version="0.20.2" /> + <interface type="write" endpoint="hdfs://host:54310" + version="0.20.2" /> + <interface type="execute" endpoint="hdfs://host:54311" + version="0.20.2" /> + <interface type="workflow" + endpoint="http://host:11000/oozie/" version="3.1" /> + <interface type="messaging" + endpoint="tcp://host:61616?daemon=true" + version="5.1.6" /> + <interface type="registry" endpoint="thrift://host:14003" + version="0.11.0" /> + + </interfaces> + <locations> + <location name="staging" path="/projects/falcon/staging" /> + <location name="temp" path="/tmp" /> + <location name="working" path="/projectsTest/falcon/working" /> + </locations> + <properties> + <property name="hive.metastore.client.socket.timeout" value="120"/> + <property name="field1" value="value1" /> + <property name="field2" value="value2" /> + </properties> +</cluster> http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1.xml ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1.xml b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1.xml new file mode 100644 index 0000000..e1eb173 --- /dev/null +++ b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1.xml @@ -0,0 +1,51 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> +<feed description="clicks log" name="sampleFeed1" xmlns="uri:falcon:feed:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <partitions> + <partition name="country" /> + <partition name="colo" /> + </partitions> +<!-- + <groups>online,bi</groups>--> + + <frequency>minutes(20)</frequency> + <timezone>UTC</timezone> + <late-arrival cut-off="hours(6)" /> + + <clusters> + <cluster name="corp" type="source"> + <validity start="2009-02-01T00:00Z" end="2099-05-01T00:00Z" + /> + <retention limit="months(9000)" action="delete" /> <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE --> + </cluster> + </clusters> + + <locations> + <location type="data" path="/test/input-data/rawLogs/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}" /> + <location type="stats" path="/projects/falcon/clicksStats" /> + <location type="meta" path="/projects/falcon/clicksMetaData" /> + </locations> + <ACL owner="testuser" group="group" permission="0x755" /> + <schema location="/schema/clicks" provider="protobuf" /> + + <properties> + <property name="field1" value="value1" /> + <property name="field2" value="value2" /> + </properties> +</feed> http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1_hcat.xml ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1_hcat.xml b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1_hcat.xml new file mode 100644 index 0000000..e85a2e4 --- /dev/null +++ b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1_hcat.xml @@ -0,0 +1,41 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> +<feed description="clicks log" name="sampleFeedHCat1" xmlns="uri:falcon:feed:0.1" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <frequency>minutes(20)</frequency> + <timezone>UTC</timezone> + <late-arrival cut-off="hours(6)" /> + + <clusters> + <cluster name="corp" type="source"> + <validity start="2009-02-01T00:00Z" end="2099-05-01T00:00Z" /> + <retention limit="months(9000)" action="delete" /> <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE --> + </cluster> + </clusters> + + <table + uri="catalog:default:mytablepart3#year=${YEAR};month=${MONTH};day=${DAY};hour=${HOUR}" /> + <ACL owner="testuser" group="group" permission="0x755" /> + <schema location="hcat" provider="hcat" /> + + <properties> + <property name="field1" value="value1" /> + <property name="field2" value="value2" /> + </properties> +</feed> http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2.xml ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2.xml b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2.xml new file mode 100644 index 0000000..0d40de6 --- /dev/null +++ b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2.xml @@ -0,0 +1,51 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> +<feed description="clicks log" name="sampleFeed2" xmlns="uri:falcon:feed:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> +<!-- <partitions> + <partition name="fraud" /> + <partition name="good" /> + </partitions> + + <groups>online,bi</groups>--> + + <frequency>hours(1)</frequency> +<timezone>UTC</timezone> + <late-arrival cut-off="hours(6)" /> + + <clusters> + <cluster name="corp" type="source"> + <validity start="2009-02-01T01:00Z" end="2099-05-01T00:00Z" + /> + <retention limit="hours(6)" action="delete" /> <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE --> + </cluster> + </clusters> + + <locations> + <location type="data" path="/examples/output-data/aggregator/aggregatedLogs/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}" /> + <location type="stats" path="/projects/falcon/clicksStats" /> + <location type="meta" path="/projects/falcon/clicksMetaData" /> + </locations> + <ACL owner="testuser" group="group" permission="0x755" /> + <schema location="/schema/clicks" provider="protobuf" /> + + <properties> + <property name="field1" value="value1" /> + <property name="field2" value="value2" /> + </properties> +</feed> http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2_hcat.xml ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2_hcat.xml b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2_hcat.xml new file mode 100644 index 0000000..cdc396c --- /dev/null +++ b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2_hcat.xml @@ -0,0 +1,47 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> +<feed description="clicks log" name="sampleFeedHCat2" xmlns="uri:falcon:feed:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> +<!-- <partitions> + <partition name="fraud" /> + <partition name="good" /> + </partitions> + + <groups>online,bi</groups>--> + + <frequency>hours(1)</frequency> +<timezone>UTC</timezone> + <late-arrival cut-off="hours(6)" /> + + <clusters> + <cluster name="corp" type="source"> + <validity start="2009-02-01T01:00Z" end="2099-05-01T00:00Z" + /> + <retention limit="hours(6)" action="delete" /> <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE --> + </cluster> + </clusters> + + <table uri="catalog:default:output_table#year=${YEAR};month=${MONTH};day=${DAY};hour=${HOUR}" /> + <ACL owner="testuser" group="group" permission="0x755" /> + <schema location="hcat" provider="hcat" /> + + <properties> + <property name="field1" value="value1" /> + <property name="field2" value="value2" /> + </properties> +</feed> http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/feed-template3.xml ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/feed-template3.xml b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template3.xml new file mode 100644 index 0000000..d0698d5 --- /dev/null +++ b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template3.xml @@ -0,0 +1,51 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> +<feed description="clicks log" name="sampleFeed3" xmlns="uri:falcon:feed:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <partitions> + <partition name="country" /> + <partition name="colo" /> + </partitions> +<!-- + <groups>online,bi</groups>--> + + <frequency>minutes(20)</frequency> + <timezone>UTC</timezone> + <late-arrival cut-off="hours(6)" /> + + <clusters> + <cluster name="corp" type="source"> + <validity start="2009-02-01T00:00Z" end="2099-05-01T00:00Z" + /> + <retention limit="months(9000)" action="delete" /> <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE --> + </cluster> + </clusters> + + <locations> + <location type="data" path="/test/input-data/rawLogs/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}" /> + <location type="stats" path="/projects/falcon/clicksStats" /> + <location type="meta" path="/projects/falcon/clicksMetaData" /> + </locations> + <ACL owner="testuser" group="group" permission="0x755" /> + <schema location="/schema/clicks" provider="protobuf" /> + + <properties> + <property name="field1" value="value1" /> + <property name="field2" value="value2" /> + </properties> +</feed> http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/process-agg.xml ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/process-agg.xml b/falcon-regression/merlin/src/test/resources/combinedActions/process-agg.xml new file mode 100644 index 0000000..25abcd9 --- /dev/null +++ b/falcon-regression/merlin/src/test/resources/combinedActions/process-agg.xml @@ -0,0 +1,51 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + +<process name="aggregator-coord16" xmlns="uri:falcon:process:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <clusters> + <cluster name="corp"> + <validity end="2011-01-03T03:00Z" start="2010-01-02T01:00Z" /> + </cluster> + </clusters> + <parallel>1</parallel> + <order>FIFO</order> + <frequency>minutes(5)</frequency> + <timezone>UTC</timezone> + <inputs> + <input end="now(0,0)" start="now(0,0)" + feed="sampleFeedHCat1" name="inputData"/> + <input end="now(0,0)" start="now(0,-20)" + feed="sampleFeed1" name="INPUT"/> + </inputs> + <outputs> + <output instance="now(0,0)" feed="sampleFeedHCat2" + name="outputData"/> + <output instance="now(0,0)" feed="sampleFeed2" + name="OUTPUT" /> + <output instance="now(0,0)" feed="sampleFeed3" + name="OUTPUTMR" /> + </outputs> + <properties> + <property name="queueName" value="default"/> + <property name="fileTime" value="${formatTime(dateOffset(instanceTime(), 1, 'DAY'), 'yyyy-MMM-dd')}"/> + <property name="user" value="${user()}"/> + </properties> + <workflow path="/examples/apps/aggregator"/> + <retry policy="periodic" delay="minutes(3)" attempts="3" /> +</process> http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedWorkflow/id.pig ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/combinedWorkflow/id.pig b/falcon-regression/merlin/src/test/resources/combinedWorkflow/id.pig new file mode 100644 index 0000000..11f227e --- /dev/null +++ b/falcon-regression/merlin/src/test/resources/combinedWorkflow/id.pig @@ -0,0 +1,20 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +A = load '$INPUT' using PigStorage(':'); +B = foreach A generate $0 as id; +store B into '$OUTPUT' USING PigStorage(); http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedWorkflow/script.hql ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/combinedWorkflow/script.hql b/falcon-regression/merlin/src/test/resources/combinedWorkflow/script.hql new file mode 100644 index 0000000..203633a --- /dev/null +++ b/falcon-regression/merlin/src/test/resources/combinedWorkflow/script.hql @@ -0,0 +1,19 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +INSERT OVERWRITE TABLE ${falcon_outputData_database}.${falcon_outputData_table} PARTITION ${falcon_inputData_partition_filter_hive} SELECT id, value FROM ${falcon_inputData_database}.${falcon_inputData_table} WHERE ${falcon_inputData_partition_filter_hive}; http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedWorkflow/workflow.xml ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/combinedWorkflow/workflow.xml b/falcon-regression/merlin/src/test/resources/combinedWorkflow/workflow.xml new file mode 100644 index 0000000..a80719a --- /dev/null +++ b/falcon-regression/merlin/src/test/resources/combinedWorkflow/workflow.xml @@ -0,0 +1,110 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<workflow-app xmlns="uri:oozie:workflow:0.2" name="aggregator-wf"> + <start to="aggregator"/> + <action name="aggregator"> + <map-reduce> + <job-tracker>${jobTracker}</job-tracker> + <name-node>${nameNode}</name-node> + <prepare> + <delete path="${OUTPUTMR}"/> + </prepare> + <configuration> + <property> + <name>mapred.job.queue.name</name> + <value>${queueName}</value> + </property> + <property> + <name>mapred.mapper.class</name> + <value>org.apache.hadoop.mapred.lib.IdentityMapper</value> + </property> + <property> + <name>mapred.reducer.class</name> + <value>org.apache.hadoop.mapred.lib.IdentityReducer</value> + </property> + <property> + <name>mapred.map.tasks</name> + <value>1</value> + </property> + <property> + <name>mapred.input.dir</name> + <value>${INPUT}</value> + </property> + <property> + <name>mapred.output.dir</name> + <value>${OUTPUTMR}</value> + </property> + </configuration> + </map-reduce> + <ok to="pigAction"/> + <error to="failMapRed"/> + </action> + + <action name="pigAction"> + <pig> + <job-tracker>${jobTracker}</job-tracker> + <name-node>${nameNode}</name-node> + <script>id.pig</script> + <param>INPUT=${INPUT}</param> + <param>OUTPUT=${OUTPUT}</param> + </pig> + <ok to="hiveAction"/> + <error to="failPig"/> + </action> + + <action name="hiveAction"> + <hive xmlns="uri:oozie:hive-action:0.2"> + <job-tracker>${jobTracker}</job-tracker> + <name-node>${nameNode}</name-node> + <job-xml>${wf:appPath()}/conf/hive-site.xml</job-xml> + <configuration> + <property> + <name>mapred.job.queue.name</name> + <value>${queueName}</value> + </property> + <property> + <name>oozie.launcher.mapred.job.priority</name> + <value>${jobPriority}</value> + </property> + <property> + <name>queueName</name> + <value>default</value> + </property> + </configuration> + <script>script.hql</script> + <param>falcon_inputData_database=${falcon_inputData_database}</param> + <param>falcon_outputData_database=${falcon_outputData_database}</param> + <param>falcon_inputData_table=${falcon_inputData_table}</param> + <param>falcon_outputData_table=${falcon_outputData_table}</param> + <param>falcon_inputData_partition_filter_hive=${falcon_inputData_partition_filter_hive}</param> + </hive> + + <ok to="end"/> + <error to="failHive"/> + </action> + + <kill name="failMapRed"> + <message>Map/Reduce failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> + </kill> + <kill name="failHive"> + <message>Hive action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> + </kill> + <kill name="failPig"> + <message>Pig action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> + </kill> + <end name="end"/> +</workflow-app> +-
