Repository: spark Updated Branches: refs/heads/master 37bf76a2d -> 41afa1650
[SPARK-8652] [PYSPARK] Check return value for all uses of doctest.testmod() This patch addresses a critical issue in the PySpark tests: Several of our Python modules' `__main__` methods call `doctest.testmod()` in order to run doctests but forget to check and handle its return value. As a result, some PySpark test failures can go unnoticed because they will not fail the build. Fortunately, there was only one test failure which was masked by this bug: a `pyspark.profiler` doctest was failing due to changes in RDD pipelining. Author: Josh Rosen <joshro...@databricks.com> Closes #7032 from JoshRosen/testmod-fix and squashes the following commits: 60dbdc0 [Josh Rosen] Account for int vs. long formatting change in Python 3 8b8d80a [Josh Rosen] Fix failing test. e6423f9 [Josh Rosen] Check return code for all uses of doctest.testmod(). Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41afa165 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41afa165 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41afa165 Branch: refs/heads/master Commit: 41afa16500e682475eaa80e31c0434b7ab66abcb Parents: 37bf76a Author: Josh Rosen <joshro...@databricks.com> Authored: Fri Jun 26 08:12:22 2015 -0700 Committer: Davies Liu <dav...@databricks.com> Committed: Fri Jun 26 08:12:22 2015 -0700 ---------------------------------------------------------------------- dev/merge_spark_pr.py | 4 +++- python/pyspark/accumulators.py | 4 +++- python/pyspark/broadcast.py | 4 +++- python/pyspark/heapq3.py | 5 +++-- python/pyspark/profiler.py | 8 ++++++-- python/pyspark/serializers.py | 8 +++++--- python/pyspark/shuffle.py | 4 +++- python/pyspark/streaming/util.py | 4 +++- 8 files changed, 29 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/dev/merge_spark_pr.py ---------------------------------------------------------------------- diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index cd83b35..cf827ce 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -431,6 +431,8 @@ def main(): if __name__ == "__main__": import doctest - doctest.testmod() + (failure_count, test_count) = doctest.testmod() + if failure_count: + exit(-1) main() http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/accumulators.py ---------------------------------------------------------------------- diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py index adca90d..6ef8cf5 100644 --- a/python/pyspark/accumulators.py +++ b/python/pyspark/accumulators.py @@ -264,4 +264,6 @@ def _start_update_server(): if __name__ == "__main__": import doctest - doctest.testmod() + (failure_count, test_count) = doctest.testmod() + if failure_count: + exit(-1) http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/broadcast.py ---------------------------------------------------------------------- diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py index 3de4615..663c9ab 100644 --- a/python/pyspark/broadcast.py +++ b/python/pyspark/broadcast.py @@ -115,4 +115,6 @@ class Broadcast(object): if __name__ == "__main__": import doctest - doctest.testmod() + (failure_count, test_count) = doctest.testmod() + if failure_count: + exit(-1) http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/heapq3.py ---------------------------------------------------------------------- diff --git a/python/pyspark/heapq3.py b/python/pyspark/heapq3.py index 4ef2afe..b27e91a 100644 --- a/python/pyspark/heapq3.py +++ b/python/pyspark/heapq3.py @@ -883,6 +883,7 @@ except ImportError: if __name__ == "__main__": - import doctest - print(doctest.testmod()) + (failure_count, test_count) = doctest.testmod() + if failure_count: + exit(-1) http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/profiler.py ---------------------------------------------------------------------- diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py index d18daaa..44d17bd 100644 --- a/python/pyspark/profiler.py +++ b/python/pyspark/profiler.py @@ -90,9 +90,11 @@ class Profiler(object): >>> sc = SparkContext('local', 'test', conf=conf, profiler_cls=MyCustomProfiler) >>> sc.parallelize(range(1000)).map(lambda x: 2 * x).take(10) [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] + >>> sc.parallelize(range(1000)).count() + 1000 >>> sc.show_profiles() My custom profiles for RDD:1 - My custom profiles for RDD:2 + My custom profiles for RDD:3 >>> sc.stop() """ @@ -169,4 +171,6 @@ class BasicProfiler(Profiler): if __name__ == "__main__": import doctest - doctest.testmod() + (failure_count, test_count) = doctest.testmod() + if failure_count: + exit(-1) http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/serializers.py ---------------------------------------------------------------------- diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index 7f9d0a3..411b4db 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -44,8 +44,8 @@ which contains two batches of two objects: >>> rdd.glom().collect() [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]] ->>> rdd._jrdd.count() -8L +>>> int(rdd._jrdd.count()) +8 >>> sc.stop() """ @@ -556,4 +556,6 @@ def write_with_length(obj, stream): if __name__ == '__main__': import doctest - doctest.testmod() + (failure_count, test_count) = doctest.testmod() + if failure_count: + exit(-1) http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/shuffle.py ---------------------------------------------------------------------- diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py index 67752c0..8fb71ba 100644 --- a/python/pyspark/shuffle.py +++ b/python/pyspark/shuffle.py @@ -838,4 +838,6 @@ class ExternalGroupBy(ExternalMerger): if __name__ == "__main__": import doctest - doctest.testmod() + (failure_count, test_count) = doctest.testmod() + if failure_count: + exit(-1) http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/streaming/util.py ---------------------------------------------------------------------- diff --git a/python/pyspark/streaming/util.py b/python/pyspark/streaming/util.py index 34291f3..a9bfec2 100644 --- a/python/pyspark/streaming/util.py +++ b/python/pyspark/streaming/util.py @@ -125,4 +125,6 @@ def rddToFileName(prefix, suffix, timestamp): if __name__ == "__main__": import doctest - doctest.testmod() + (failure_count, test_count) = doctest.testmod() + if failure_count: + exit(-1) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org