[1/4] SPARK-1637: Clean up examples for 1.0

matei Tue, 06 May 2014 17:29:16 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-1.0 d8f1b33f4 -> 8cfebf5bf



http://git-wip-us.apache.org/repos/asf/spark/blob/8cfebf5b/python/examples/sort.py
----------------------------------------------------------------------
diff --git a/python/examples/sort.py b/python/examples/sort.py
deleted file mode 100755
index 5de20a6..0000000
--- a/python/examples/sort.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import sys
-
-from pyspark import SparkContext
-
-
-if __name__ == "__main__":
-    if len(sys.argv) < 3:
-        print >> sys.stderr, "Usage: sort <master> <file>"
-        exit(-1)
-    sc = SparkContext(sys.argv[1], "PythonSort")
-    lines = sc.textFile(sys.argv[2], 1)
-    sortedCount = lines.flatMap(lambda x: x.split(' ')) \
-                  .map(lambda x: (int(x), 1)) \
-                  .sortByKey(lambda x: x)
-    # This is just a demo on how to bring all the sorted data back to a single 
node.
-    # In reality, we wouldn't want to collect all the data to the driver node.
-    output = sortedCount.collect()
-    for (num, unitcount) in output:
-        print num

http://git-wip-us.apache.org/repos/asf/spark/blob/8cfebf5b/python/examples/transitive_closure.py
----------------------------------------------------------------------
diff --git a/python/examples/transitive_closure.py 
b/python/examples/transitive_closure.py
deleted file mode 100755
index 744cce6..0000000
--- a/python/examples/transitive_closure.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import sys
-from random import Random
-
-from pyspark import SparkContext
-
-numEdges = 200
-numVertices = 100
-rand = Random(42)
-
-
-def generateGraph():
-    edges = set()
-    while len(edges) < numEdges:
-        src = rand.randrange(0, numEdges)
-        dst = rand.randrange(0, numEdges)
-        if src != dst:
-            edges.add((src, dst))
-    return edges
-
-
-if __name__ == "__main__":
-    if len(sys.argv) == 1:
-        print >> sys.stderr, "Usage: transitive_closure <master> [<slices>]"
-        exit(-1)
-    sc = SparkContext(sys.argv[1], "PythonTransitiveClosure")
-    slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
-    tc = sc.parallelize(generateGraph(), slices).cache()
-
-    # Linear transitive closure: each round grows paths by one edge,
-    # by joining the graph's edges with the already-discovered paths.
-    # e.g. join the path (y, z) from the TC with the edge (x, y) from
-    # the graph to obtain the path (x, z).
-
-    # Because join() joins on keys, the edges are stored in reversed order.
-    edges = tc.map(lambda (x, y): (y, x))
-
-    oldCount = 0L
-    nextCount = tc.count()
-    while True:
-        oldCount = nextCount
-        # Perform the join, obtaining an RDD of (y, (z, x)) pairs,
-        # then project the result to obtain the new (x, z) paths.
-        new_edges = tc.join(edges).map(lambda (_, (a, b)): (b, a))
-        tc = tc.union(new_edges).distinct().cache()
-        nextCount = tc.count()
-        if nextCount == oldCount:
-            break
-
-    print "TC has %i edges" % tc.count()

http://git-wip-us.apache.org/repos/asf/spark/blob/8cfebf5b/python/examples/wordcount.py
----------------------------------------------------------------------
diff --git a/python/examples/wordcount.py b/python/examples/wordcount.py
deleted file mode 100755
index b9139b9..0000000
--- a/python/examples/wordcount.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import sys
-from operator import add
-
-from pyspark import SparkContext
-
-
-if __name__ == "__main__":
-    if len(sys.argv) < 3:
-        print >> sys.stderr, "Usage: wordcount <master> <file>"
-        exit(-1)
-    sc = SparkContext(sys.argv[1], "PythonWordCount")
-    lines = sc.textFile(sys.argv[2], 1)
-    counts = lines.flatMap(lambda x: x.split(' ')) \
-                  .map(lambda x: (x, 1)) \
-                  .reduceByKey(add)
-    output = counts.collect()
-    for (word, count) in output:
-        print "%s: %i" % (word, count)

http://git-wip-us.apache.org/repos/asf/spark/blob/8cfebf5b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
----------------------------------------------------------------------
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
index bbf57ef..a73d6f3 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
@@ -25,7 +25,7 @@ import scala.collection.JavaConversions.mapAsScalaMap
 private[streaming]
 object RawTextHelper {
 
-  /** 
+  /**
    * Splits lines and counts the words.
    */
   def splitAndCountPartitions(iter: Iterator[String]): Iterator[(String, 
Long)] = {
@@ -114,4 +114,3 @@ object RawTextHelper {
 
   def max(v1: Long, v2: Long) = math.max(v1, v2)
 }
-

[1/4] SPARK-1637: Clean up examples for 1.0

Reply via email to