Repository: spark Updated Branches: refs/heads/branch-1.0 d8f1b33f4 -> 8cfebf5bf
http://git-wip-us.apache.org/repos/asf/spark/blob/8cfebf5b/python/examples/sort.py ---------------------------------------------------------------------- diff --git a/python/examples/sort.py b/python/examples/sort.py deleted file mode 100755 index 5de20a6..0000000 --- a/python/examples/sort.py +++ /dev/null @@ -1,36 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import sys - -from pyspark import SparkContext - - -if __name__ == "__main__": - if len(sys.argv) < 3: - print >> sys.stderr, "Usage: sort <master> <file>" - exit(-1) - sc = SparkContext(sys.argv[1], "PythonSort") - lines = sc.textFile(sys.argv[2], 1) - sortedCount = lines.flatMap(lambda x: x.split(' ')) \ - .map(lambda x: (int(x), 1)) \ - .sortByKey(lambda x: x) - # This is just a demo on how to bring all the sorted data back to a single node. - # In reality, we wouldn't want to collect all the data to the driver node. - output = sortedCount.collect() - for (num, unitcount) in output: - print num http://git-wip-us.apache.org/repos/asf/spark/blob/8cfebf5b/python/examples/transitive_closure.py ---------------------------------------------------------------------- diff --git a/python/examples/transitive_closure.py b/python/examples/transitive_closure.py deleted file mode 100755 index 744cce6..0000000 --- a/python/examples/transitive_closure.py +++ /dev/null @@ -1,66 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import sys -from random import Random - -from pyspark import SparkContext - -numEdges = 200 -numVertices = 100 -rand = Random(42) - - -def generateGraph(): - edges = set() - while len(edges) < numEdges: - src = rand.randrange(0, numEdges) - dst = rand.randrange(0, numEdges) - if src != dst: - edges.add((src, dst)) - return edges - - -if __name__ == "__main__": - if len(sys.argv) == 1: - print >> sys.stderr, "Usage: transitive_closure <master> [<slices>]" - exit(-1) - sc = SparkContext(sys.argv[1], "PythonTransitiveClosure") - slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2 - tc = sc.parallelize(generateGraph(), slices).cache() - - # Linear transitive closure: each round grows paths by one edge, - # by joining the graph's edges with the already-discovered paths. - # e.g. join the path (y, z) from the TC with the edge (x, y) from - # the graph to obtain the path (x, z). - - # Because join() joins on keys, the edges are stored in reversed order. - edges = tc.map(lambda (x, y): (y, x)) - - oldCount = 0L - nextCount = tc.count() - while True: - oldCount = nextCount - # Perform the join, obtaining an RDD of (y, (z, x)) pairs, - # then project the result to obtain the new (x, z) paths. - new_edges = tc.join(edges).map(lambda (_, (a, b)): (b, a)) - tc = tc.union(new_edges).distinct().cache() - nextCount = tc.count() - if nextCount == oldCount: - break - - print "TC has %i edges" % tc.count() http://git-wip-us.apache.org/repos/asf/spark/blob/8cfebf5b/python/examples/wordcount.py ---------------------------------------------------------------------- diff --git a/python/examples/wordcount.py b/python/examples/wordcount.py deleted file mode 100755 index b9139b9..0000000 --- a/python/examples/wordcount.py +++ /dev/null @@ -1,35 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import sys -from operator import add - -from pyspark import SparkContext - - -if __name__ == "__main__": - if len(sys.argv) < 3: - print >> sys.stderr, "Usage: wordcount <master> <file>" - exit(-1) - sc = SparkContext(sys.argv[1], "PythonWordCount") - lines = sc.textFile(sys.argv[2], 1) - counts = lines.flatMap(lambda x: x.split(' ')) \ - .map(lambda x: (x, 1)) \ - .reduceByKey(add) - output = counts.collect() - for (word, count) in output: - print "%s: %i" % (word, count) http://git-wip-us.apache.org/repos/asf/spark/blob/8cfebf5b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala ---------------------------------------------------------------------- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala index bbf57ef..a73d6f3 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala @@ -25,7 +25,7 @@ import scala.collection.JavaConversions.mapAsScalaMap private[streaming] object RawTextHelper { - /** + /** * Splits lines and counts the words. */ def splitAndCountPartitions(iter: Iterator[String]): Iterator[(String, Long)] = { @@ -114,4 +114,3 @@ object RawTextHelper { def max(v1: Long, v2: Long) = math.max(v1, v2) } -
