Repository: oozie Updated Branches: refs/heads/master 3d1fe2877 -> ae11fe7a9
OOZIE-3321 PySpark example fails (daniel.becker via andras.piros) Project: http://git-wip-us.apache.org/repos/asf/oozie/repo Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/ae11fe7a Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/ae11fe7a Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/ae11fe7a Branch: refs/heads/master Commit: ae11fe7a9f07bed71faaec3a91232514c67f8b0f Parents: 3d1fe28 Author: Andras Piros <andras.pi...@cloudera.com> Authored: Wed Aug 8 11:17:41 2018 +0200 Committer: Andras Piros <andras.pi...@cloudera.com> Committed: Wed Aug 8 11:17:41 2018 +0200 ---------------------------------------------------------------------- examples/src/main/apps/pyspark/lib/pi.py | 41 ------------------- release-log.txt | 1 + sharelib/pom.xml | 15 +++++++ sharelib/spark/src/main/resources/pi.py | 41 +++++++++++++++++++ .../spark/src/main/resources/py4j-0.9-src.zip | Bin 0 -> 44846 bytes sharelib/spark/src/main/resources/pyspark.zip | Bin 0 -> 357051 bytes .../apache/oozie/action/hadoop/TestPyspark.java | 6 --- sharelib/spark/src/test/resources/pi.py | 41 ------------------- .../spark/src/test/resources/py4j-0.9-src.zip | Bin 44846 -> 0 bytes sharelib/spark/src/test/resources/pyspark.zip | Bin 357051 -> 0 bytes src/main/assemblies/examples.xml | 4 ++ 11 files changed, 61 insertions(+), 88 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/examples/src/main/apps/pyspark/lib/pi.py ---------------------------------------------------------------------- diff --git a/examples/src/main/apps/pyspark/lib/pi.py b/examples/src/main/apps/pyspark/lib/pi.py deleted file mode 100644 index a74dc93..0000000 --- a/examples/src/main/apps/pyspark/lib/pi.py +++ /dev/null @@ -1,41 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import sys -from random import random -from operator import add - -from pyspark import SparkContext - - -if __name__ == "__main__": - """ - Usage: pi [partitions] - """ - sc = SparkContext(appName="Python-Spark-Pi") - partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2 - n = 100000 * partitions - - def f(_): - x = random() * 2 - 1 - y = random() * 2 - 1 - return 1 if x ** 2 + y ** 2 < 1 else 0 - - count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add) - print("Pi is roughly %f" % (4.0 * count / n)) - - sc.stop() \ No newline at end of file http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index 1979e6d..5ed9f79 100644 --- a/release-log.txt +++ b/release-log.txt @@ -1,5 +1,6 @@ -- Oozie 5.1.0 release (trunk - unreleased) +OOZIE-3321 PySpark example fails (daniel.becker via andras.piros) OOZIE-3315 DateList example fails (daniel.becker via andras.piros) OOZIE-3313 Hive example action fails (daniel.becker via gezapeti) OOZIE-3193 Applications are not killed when submitted via subworkflow (kmarton via gezapeti, andras.piros) http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/pom.xml ---------------------------------------------------------------------- diff --git a/sharelib/pom.xml b/sharelib/pom.xml index 6a0864d..39cea25 100644 --- a/sharelib/pom.xml +++ b/sharelib/pom.xml @@ -61,6 +61,21 @@ <goal>resources</goal> </goals> </execution> + <execution> + <id>copy-resources</id> + <phase>generate-test-resources</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>${basedir}/spark/target/test-classes</outputDirectory> + <resources> + <resource> + <directory>spark/src/main/resources</directory> + </resource> + </resources> + </configuration> + </execution> </executions> </plugin> <plugin> http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/main/resources/pi.py ---------------------------------------------------------------------- diff --git a/sharelib/spark/src/main/resources/pi.py b/sharelib/spark/src/main/resources/pi.py new file mode 100644 index 0000000..e9836b2 --- /dev/null +++ b/sharelib/spark/src/main/resources/pi.py @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +from random import random +from operator import add + +from pyspark import SparkContext + + +if __name__ == "__main__": + """ + Usage: pi [partitions] + """ + sc = SparkContext(appName="PythonPi") + partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2 + n = 100000 * partitions + + def f(_): + x = random() * 2 - 1 + y = random() * 2 - 1 + return 1 if x ** 2 + y ** 2 < 1 else 0 + + count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add) + print("Pi is roughly %f" % (4.0 * count / n)) + + sc.stop() \ No newline at end of file http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/main/resources/py4j-0.9-src.zip ---------------------------------------------------------------------- diff --git a/sharelib/spark/src/main/resources/py4j-0.9-src.zip b/sharelib/spark/src/main/resources/py4j-0.9-src.zip new file mode 100644 index 0000000..dace2d0 Binary files /dev/null and b/sharelib/spark/src/main/resources/py4j-0.9-src.zip differ http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/main/resources/pyspark.zip ---------------------------------------------------------------------- diff --git a/sharelib/spark/src/main/resources/pyspark.zip b/sharelib/spark/src/main/resources/pyspark.zip new file mode 100644 index 0000000..9ff8bd8 Binary files /dev/null and b/sharelib/spark/src/main/resources/pyspark.zip differ http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestPyspark.java ---------------------------------------------------------------------- diff --git a/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestPyspark.java b/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestPyspark.java index 9d8d4aa..f83bbfe 100644 --- a/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestPyspark.java +++ b/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestPyspark.java @@ -24,15 +24,9 @@ import java.util.ArrayList; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.JobClient; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobID; -import org.apache.hadoop.mapred.RunningJob; import org.apache.oozie.WorkflowActionBean; import org.apache.oozie.WorkflowJobBean; import org.apache.oozie.client.WorkflowAction; -import org.apache.oozie.service.HadoopAccessorService; -import org.apache.oozie.service.Services; import org.apache.oozie.service.WorkflowAppService; import org.apache.oozie.util.IOUtils; import org.apache.oozie.util.XConfiguration; http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/test/resources/pi.py ---------------------------------------------------------------------- diff --git a/sharelib/spark/src/test/resources/pi.py b/sharelib/spark/src/test/resources/pi.py deleted file mode 100644 index e9836b2..0000000 --- a/sharelib/spark/src/test/resources/pi.py +++ /dev/null @@ -1,41 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import sys -from random import random -from operator import add - -from pyspark import SparkContext - - -if __name__ == "__main__": - """ - Usage: pi [partitions] - """ - sc = SparkContext(appName="PythonPi") - partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2 - n = 100000 * partitions - - def f(_): - x = random() * 2 - 1 - y = random() * 2 - 1 - return 1 if x ** 2 + y ** 2 < 1 else 0 - - count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add) - print("Pi is roughly %f" % (4.0 * count / n)) - - sc.stop() \ No newline at end of file http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/test/resources/py4j-0.9-src.zip ---------------------------------------------------------------------- diff --git a/sharelib/spark/src/test/resources/py4j-0.9-src.zip b/sharelib/spark/src/test/resources/py4j-0.9-src.zip deleted file mode 100644 index dace2d0..0000000 Binary files a/sharelib/spark/src/test/resources/py4j-0.9-src.zip and /dev/null differ http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/test/resources/pyspark.zip ---------------------------------------------------------------------- diff --git a/sharelib/spark/src/test/resources/pyspark.zip b/sharelib/spark/src/test/resources/pyspark.zip deleted file mode 100644 index 9ff8bd8..0000000 Binary files a/sharelib/spark/src/test/resources/pyspark.zip and /dev/null differ http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/src/main/assemblies/examples.xml ---------------------------------------------------------------------- diff --git a/src/main/assemblies/examples.xml b/src/main/assemblies/examples.xml index ee485dc..c365ccd 100644 --- a/src/main/assemblies/examples.xml +++ b/src/main/assemblies/examples.xml @@ -31,6 +31,10 @@ <directory>${basedir}/src/main/apps</directory> <outputDirectory>/examples/apps</outputDirectory> </fileSet> + <fileSet> + <directory>${basedir}/../sharelib/spark/src/main/resources</directory> + <outputDirectory>/examples/apps/pyspark/lib</outputDirectory> + </fileSet> </fileSets> <files>