Repository: oozie Updated Branches: refs/heads/master e62ffc39e -> c8748d221
OOZIE-2540 Create a PySpark example (abhishekbafna via rkanter) Project: http://git-wip-us.apache.org/repos/asf/oozie/repo Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/c8748d22 Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/c8748d22 Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/c8748d22 Branch: refs/heads/master Commit: c8748d221f8265c16e37383e8b3f1572bb613083 Parents: e62ffc3 Author: Robert Kanter <rkan...@apache.org> Authored: Thu Mar 9 16:16:24 2017 -0800 Committer: Robert Kanter <rkan...@apache.org> Committed: Thu Mar 9 16:16:24 2017 -0800 ---------------------------------------------------------------------- examples/src/main/apps/pyspark/job.properties | 26 ++++++++++++++ examples/src/main/apps/pyspark/lib/pi.py | 41 ++++++++++++++++++++++ examples/src/main/apps/pyspark/workflow.xml | 41 ++++++++++++++++++++++ release-log.txt | 1 + 4 files changed, 109 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oozie/blob/c8748d22/examples/src/main/apps/pyspark/job.properties ---------------------------------------------------------------------- diff --git a/examples/src/main/apps/pyspark/job.properties b/examples/src/main/apps/pyspark/job.properties new file mode 100644 index 0000000..72e09b9 --- /dev/null +++ b/examples/src/main/apps/pyspark/job.properties @@ -0,0 +1,26 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +nameNode=hdfs://localhost:8020 +jobTracker=localhost:8021 +queueName=default +examplesRoot=examples +oozie.use.system.libpath=true +oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/pyspark +master=yarn-client + http://git-wip-us.apache.org/repos/asf/oozie/blob/c8748d22/examples/src/main/apps/pyspark/lib/pi.py ---------------------------------------------------------------------- diff --git a/examples/src/main/apps/pyspark/lib/pi.py b/examples/src/main/apps/pyspark/lib/pi.py new file mode 100644 index 0000000..a74dc93 --- /dev/null +++ b/examples/src/main/apps/pyspark/lib/pi.py @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +from random import random +from operator import add + +from pyspark import SparkContext + + +if __name__ == "__main__": + """ + Usage: pi [partitions] + """ + sc = SparkContext(appName="Python-Spark-Pi") + partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2 + n = 100000 * partitions + + def f(_): + x = random() * 2 - 1 + y = random() * 2 - 1 + return 1 if x ** 2 + y ** 2 < 1 else 0 + + count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add) + print("Pi is roughly %f" % (4.0 * count / n)) + + sc.stop() \ No newline at end of file http://git-wip-us.apache.org/repos/asf/oozie/blob/c8748d22/examples/src/main/apps/pyspark/workflow.xml ---------------------------------------------------------------------- diff --git a/examples/src/main/apps/pyspark/workflow.xml b/examples/src/main/apps/pyspark/workflow.xml new file mode 100644 index 0000000..4768a6c --- /dev/null +++ b/examples/src/main/apps/pyspark/workflow.xml @@ -0,0 +1,41 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<workflow-app xmlns='uri:oozie:workflow:0.5' name='SparkPythonPi'> + + <start to='spark-node' /> + + <action name='spark-node'> + <spark xmlns="uri:oozie:spark-action:0.1"> + <job-tracker>${jobTracker}</job-tracker> + <name-node>${nameNode}</name-node> + <master>${master}</master> + <name>Python-Spark-Pi</name> + <jar>pi.py</jar> + </spark> + <ok to="end" /> + <error to="fail" /> + </action> + + <kill name="fail"> + <message>Workflow failed, error message [${wf:errorMessage(wf:lastErrorNode())}]</message> + </kill> + + <end name='end' /> + +</workflow-app> http://git-wip-us.apache.org/repos/asf/oozie/blob/c8748d22/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index 73cd8fa..5c55cad 100644 --- a/release-log.txt +++ b/release-log.txt @@ -1,5 +1,6 @@ -- Oozie 4.4.0 release (trunk - unreleased) +OOZIE-2540 Create a PySpark example (abhishekbafna via rkanter) OOZIE-807 Docs can be explicit about multiple sub-workflow definitions being possible (qwertymaniac via rkanter) OOZIE-2811 Add support for filtering out properties from SparkConfigurationService (gezapeti via rkanter) OOZIE-2802 Spark action failure on Spark 2.1.0 due to duplicate sharelibs (gezapeti via rkanter)