Repository: spark-website
Updated Branches:
  refs/heads/asf-site ae58782ba -> 879303593


Update Java example to use Java 8; make Scala/Python pi example consistent with 
better Java version; minor syntax fixes to these


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/87930359
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/87930359
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/87930359

Branch: refs/heads/asf-site
Commit: 879303593efa229d416eb4178913c1c1a6f7033c
Parents: ae58782
Author: Sean Owen <[email protected]>
Authored: Sun Feb 19 08:28:48 2017 -0800
Committer: Sean Owen <[email protected]>
Committed: Sun Feb 19 08:28:48 2017 -0800

----------------------------------------------------------------------
 examples.md        | 57 +++++++++++++++++++------------------------------
 site/examples.html | 57 +++++++++++++++++++------------------------------
 2 files changed, 44 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark-website/blob/87930359/examples.md
----------------------------------------------------------------------
diff --git a/examples.md b/examples.md
index 7f13e41..4a87331 100644
--- a/examples.md
+++ b/examples.md
@@ -61,15 +61,9 @@ counts.saveAsTextFile("hdfs://...")
 <div class="code code-tab">
 {% highlight java %}
 JavaRDD<String> textFile = sc.textFile("hdfs://...");
-JavaRDD<String> words = textFile.flatMap(new FlatMapFunction<String, String>() 
{
-  public Iterator<String> call(String s) { return Arrays.asList(s.split(" 
")).iterator(); }
-});
-JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, 
String, Integer>() {
-  public Tuple2<String, Integer> call(String s) { return new Tuple2<String, 
Integer>(s, 1); }
-});
-JavaPairRDD<String, Integer> counts = pairs.reduceByKey(new Function2<Integer, 
Integer, Integer>() {
-  public Integer call(Integer a, Integer b) { return a + b; }
-});
+JavaRDD<String> words = textFile.flatMap(s -> Arrays.asList(s.split(" 
")).iterator())
+                            .mapToPair(word -> new Tuple2<>(word, 1))
+                            .reduceByKey((a, b) -> a + b);
 counts.saveAsTextFile("hdfs://...");
 {% endhighlight %}
 </div>
@@ -89,12 +83,12 @@ counts.saveAsTextFile("hdfs://...");
 <div class="tab-pane tab-pane-python active">
 <div class="code code-tab">
 {% highlight python %}
-def sample(p):
-    x, y = random(), random()
-    return 1 if x*x + y*y < 1 else 0
+def inside(p):
+    x, y = random.random(), random.random()
+    return x*x + y*y < 1
 
-count = sc.parallelize(xrange(0, NUM_SAMPLES)).map(sample) \
-             .reduce(lambda a, b: a + b)
+count = sc.parallelize(xrange(0, NUM_SAMPLES)) \
+             .filter(inside).count()
 print "Pi is roughly %f" % (4.0 * count / NUM_SAMPLES)
 {% endhighlight %}
 </div>
@@ -103,12 +97,12 @@ print "Pi is roughly %f" % (4.0 * count / NUM_SAMPLES)
 <div class="tab-pane tab-pane-scala">
 <div class="code code-tab">
 {% highlight scala %}
-val count = sc.parallelize(1 to NUM_SAMPLES).map{i =>
-  val x = Math.random()
-  val y = Math.random()
-  if (x*x + y*y < 1) 1 else 0
-}.reduce(_ + _)
-println("Pi is roughly " + 4.0 * count / NUM_SAMPLES)
+val count = sc.parallelize(1 to NUM_SAMPLES).filter { _ =>
+  val x = math.random
+  val y = math.random
+  x*x + y*y < 1
+}.count()
+println(s"Pi is roughly ${4.0 * count / NUM_SAMPLES}")
 {% endhighlight %}
 </div>
 </div>
@@ -116,17 +110,15 @@ println("Pi is roughly " + 4.0 * count / NUM_SAMPLES)
 <div class="tab-pane tab-pane-java">
 <div class="code code-tab">
 {% highlight java %}
-List<Integer> l = new ArrayList<Integer>(NUM_SAMPLES);
+List<Integer> l = new ArrayList<>(NUM_SAMPLES);
 for (int i = 0; i < NUM_SAMPLES; i++) {
   l.add(i);
 }
 
-long count = sc.parallelize(l).filter(new Function<Integer, Boolean>() {
-  public Boolean call(Integer i) {
-    double x = Math.random();
-    double y = Math.random();
-    return x*x + y*y < 1;
-  }
+long count = sc.parallelize(l).filter(i -> {
+  double x = Math.random();
+  double y = Math.random();
+  return x*x + y*y < 1;
 }).count();
 System.out.println("Pi is roughly " + 4.0 * count / NUM_SAMPLES);
 {% endhighlight %}
@@ -194,14 +186,9 @@ errors.filter(col("line").like("%MySQL%")).collect()
 {% highlight java %}
 // Creates a DataFrame having a single column named "line"
 JavaRDD<String> textFile = sc.textFile("hdfs://...");
-JavaRDD<Row> rowRDD = textFile.map(
-  new Function<String, Row>() {
-    public Row call(String line) throws Exception {
-      return RowFactory.create(line);
-    }
-  });
-List<StructField> fields = new ArrayList<StructField>();
-fields.add(DataTypes.createStructField("line", DataTypes.StringType, true));
+JavaRDD<Row> rowRDD = textFile.map(RowFactory::create);
+List<StructField> fields = Arrays.asList(
+  DataTypes.createStructField("line", DataTypes.StringType, true));
 StructType schema = DataTypes.createStructType(fields);
 DataFrame df = sqlContext.createDataFrame(rowRDD, schema);
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/87930359/site/examples.html
----------------------------------------------------------------------
diff --git a/site/examples.html b/site/examples.html
index bfff52d..05ec479 100644
--- a/site/examples.html
+++ b/site/examples.html
@@ -247,15 +247,9 @@ In this page, we will show examples using RDD API as well 
as examples using high
 <div class="code code-tab">
 
 <figure class="highlight"><pre><code class="language-java" 
data-lang="java"><span></span><span class="n">JavaRDD</span><span 
class="o">&lt;</span><span class="n">String</span><span class="o">&gt;</span> 
<span class="n">textFile</span> <span class="o">=</span> <span 
class="n">sc</span><span class="o">.</span><span 
class="na">textFile</span><span class="o">(</span><span 
class="s">&quot;hdfs://...&quot;</span><span class="o">);</span>
-<span class="n">JavaRDD</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">&gt;</span> <span class="n">words</span> 
<span class="o">=</span> <span class="n">textFile</span><span 
class="o">.</span><span class="na">flatMap</span><span class="o">(</span><span 
class="k">new</span> <span class="n">FlatMapFunction</span><span 
class="o">&lt;</span><span class="n">String</span><span class="o">,</span> 
<span class="n">String</span><span class="o">&gt;()</span> <span 
class="o">{</span>
-  <span class="kd">public</span> <span class="n">Iterator</span><span 
class="o">&lt;</span><span class="n">String</span><span class="o">&gt;</span> 
<span class="nf">call</span><span class="o">(</span><span 
class="n">String</span> <span class="n">s</span><span class="o">)</span> <span 
class="o">{</span> <span class="k">return</span> <span 
class="n">Arrays</span><span class="o">.</span><span 
class="na">asList</span><span class="o">(</span><span class="n">s</span><span 
class="o">.</span><span class="na">split</span><span class="o">(</span><span 
class="s">&quot; &quot;</span><span class="o">)).</span><span 
class="na">iterator</span><span class="o">();</span> <span class="o">}</span>
-<span class="o">});</span>
-<span class="n">JavaPairRDD</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">&gt;</span> <span 
class="n">pairs</span> <span class="o">=</span> <span 
class="n">words</span><span class="o">.</span><span 
class="na">mapToPair</span><span class="o">(</span><span class="k">new</span> 
<span class="n">PairFunction</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">String</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">&gt;()</span> <span class="o">{</span>
-  <span class="kd">public</span> <span class="n">Tuple2</span><span 
class="o">&lt;</span><span class="n">String</span><span class="o">,</span> 
<span class="n">Integer</span><span class="o">&gt;</span> <span 
class="nf">call</span><span class="o">(</span><span class="n">String</span> 
<span class="n">s</span><span class="o">)</span> <span class="o">{</span> <span 
class="k">return</span> <span class="k">new</span> <span 
class="n">Tuple2</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">&gt;(</span><span 
class="n">s</span><span class="o">,</span> <span class="mi">1</span><span 
class="o">);</span> <span class="o">}</span>
-<span class="o">});</span>
-<span class="n">JavaPairRDD</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">&gt;</span> <span 
class="n">counts</span> <span class="o">=</span> <span 
class="n">pairs</span><span class="o">.</span><span 
class="na">reduceByKey</span><span class="o">(</span><span class="k">new</span> 
<span class="n">Function2</span><span class="o">&lt;</span><span 
class="n">Integer</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">&gt;()</span> <span class="o">{</span>
-  <span class="kd">public</span> <span class="n">Integer</span> <span 
class="nf">call</span><span class="o">(</span><span class="n">Integer</span> 
<span class="n">a</span><span class="o">,</span> <span class="n">Integer</span> 
<span class="n">b</span><span class="o">)</span> <span class="o">{</span> <span 
class="k">return</span> <span class="n">a</span> <span class="o">+</span> <span 
class="n">b</span><span class="o">;</span> <span class="o">}</span>
-<span class="o">});</span>
+<span class="n">JavaRDD</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">&gt;</span> <span class="n">words</span> 
<span class="o">=</span> <span class="n">textFile</span><span 
class="o">.</span><span class="na">flatMap</span><span class="o">(</span><span 
class="n">s</span> <span class="o">-&gt;</span> <span 
class="n">Arrays</span><span class="o">.</span><span 
class="na">asList</span><span class="o">(</span><span class="n">s</span><span 
class="o">.</span><span class="na">split</span><span class="o">(</span><span 
class="s">&quot; &quot;</span><span class="o">)).</span><span 
class="na">iterator</span><span class="o">())</span>
+                            <span class="o">.</span><span 
class="na">mapToPair</span><span class="o">(</span><span class="n">word</span> 
<span class="o">-&gt;</span> <span class="k">new</span> <span 
class="n">Tuple2</span><span class="o">&lt;&gt;(</span><span 
class="n">word</span><span class="o">,</span> <span class="mi">1</span><span 
class="o">))</span>
+                            <span class="o">.</span><span 
class="na">reduceByKey</span><span class="o">((</span><span 
class="n">a</span><span class="o">,</span> <span class="n">b</span><span 
class="o">)</span> <span class="o">-&gt;</span> <span class="n">a</span> <span 
class="o">+</span> <span class="n">b</span><span class="o">);</span>
 <span class="n">counts</span><span class="o">.</span><span 
class="na">saveAsTextFile</span><span class="o">(</span><span 
class="s">&quot;hdfs://...&quot;</span><span 
class="o">);</span></code></pre></figure>
 
 </div>
@@ -275,12 +269,12 @@ In this page, we will show examples using RDD API as well 
as examples using high
 <div class="tab-pane tab-pane-python active">
 <div class="code code-tab">
 
-<figure class="highlight"><pre><code class="language-python" 
data-lang="python"><span></span><span class="k">def</span> <span 
class="nf">sample</span><span class="p">(</span><span class="n">p</span><span 
class="p">):</span>
-    <span class="n">x</span><span class="p">,</span> <span class="n">y</span> 
<span class="o">=</span> <span class="n">random</span><span 
class="p">(),</span> <span class="n">random</span><span class="p">()</span>
-    <span class="k">return</span> <span class="mi">1</span> <span 
class="k">if</span> <span class="n">x</span><span class="o">*</span><span 
class="n">x</span> <span class="o">+</span> <span class="n">y</span><span 
class="o">*</span><span class="n">y</span> <span class="o">&lt;</span> <span 
class="mi">1</span> <span class="k">else</span> <span class="mi">0</span>
+<figure class="highlight"><pre><code class="language-python" 
data-lang="python"><span></span><span class="k">def</span> <span 
class="nf">inside</span><span class="p">(</span><span class="n">p</span><span 
class="p">):</span>
+    <span class="n">x</span><span class="p">,</span> <span class="n">y</span> 
<span class="o">=</span> <span class="n">random</span><span 
class="o">.</span><span class="n">random</span><span class="p">(),</span> <span 
class="n">random</span><span class="o">.</span><span 
class="n">random</span><span class="p">()</span>
+    <span class="k">return</span> <span class="n">x</span><span 
class="o">*</span><span class="n">x</span> <span class="o">+</span> <span 
class="n">y</span><span class="o">*</span><span class="n">y</span> <span 
class="o">&lt;</span> <span class="mi">1</span>
 
-<span class="n">count</span> <span class="o">=</span> <span 
class="n">sc</span><span class="o">.</span><span 
class="n">parallelize</span><span class="p">(</span><span 
class="nb">xrange</span><span class="p">(</span><span class="mi">0</span><span 
class="p">,</span> <span class="n">NUM_SAMPLES</span><span 
class="p">))</span><span class="o">.</span><span class="n">map</span><span 
class="p">(</span><span class="n">sample</span><span class="p">)</span> \
-             <span class="o">.</span><span class="n">reduce</span><span 
class="p">(</span><span class="k">lambda</span> <span class="n">a</span><span 
class="p">,</span> <span class="n">b</span><span class="p">:</span> <span 
class="n">a</span> <span class="o">+</span> <span class="n">b</span><span 
class="p">)</span>
+<span class="n">count</span> <span class="o">=</span> <span 
class="n">sc</span><span class="o">.</span><span 
class="n">parallelize</span><span class="p">(</span><span 
class="nb">xrange</span><span class="p">(</span><span class="mi">0</span><span 
class="p">,</span> <span class="n">NUM_SAMPLES</span><span class="p">))</span> \
+             <span class="o">.</span><span class="n">filter</span><span 
class="p">(</span><span class="n">inside</span><span class="p">)</span><span 
class="o">.</span><span class="n">count</span><span class="p">()</span>
 <span class="k">print</span> <span class="s2">&quot;Pi is roughly </span><span 
class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> 
<span class="p">(</span><span class="mf">4.0</span> <span class="o">*</span> 
<span class="n">count</span> <span class="o">/</span> <span 
class="n">NUM_SAMPLES</span><span class="p">)</span></code></pre></figure>
 
 </div>
@@ -289,12 +283,12 @@ In this page, we will show examples using RDD API as well 
as examples using high
 <div class="tab-pane tab-pane-scala">
 <div class="code code-tab">
 
-<figure class="highlight"><pre><code class="language-scala" 
data-lang="scala"><span></span><span class="k">val</span> <span 
class="n">count</span> <span class="k">=</span> <span class="n">sc</span><span 
class="o">.</span><span class="n">parallelize</span><span 
class="o">(</span><span class="mi">1</span> <span class="n">to</span> <span 
class="nc">NUM_SAMPLES</span><span class="o">).</span><span 
class="n">map</span><span class="o">{</span><span class="n">i</span> <span 
class="k">=&gt;</span>
-  <span class="k">val</span> <span class="n">x</span> <span class="k">=</span> 
<span class="nc">Math</span><span class="o">.</span><span 
class="n">random</span><span class="o">()</span>
-  <span class="k">val</span> <span class="n">y</span> <span class="k">=</span> 
<span class="nc">Math</span><span class="o">.</span><span 
class="n">random</span><span class="o">()</span>
-  <span class="k">if</span> <span class="o">(</span><span 
class="n">x</span><span class="o">*</span><span class="n">x</span> <span 
class="o">+</span> <span class="n">y</span><span class="o">*</span><span 
class="n">y</span> <span class="o">&lt;</span> <span class="mi">1</span><span 
class="o">)</span> <span class="mi">1</span> <span class="k">else</span> <span 
class="mi">0</span>
-<span class="o">}.</span><span class="n">reduce</span><span 
class="o">(</span><span class="k">_</span> <span class="o">+</span> <span 
class="k">_</span><span class="o">)</span>
-<span class="n">println</span><span class="o">(</span><span class="s">&quot;Pi 
is roughly &quot;</span> <span class="o">+</span> <span class="mf">4.0</span> 
<span class="o">*</span> <span class="n">count</span> <span class="o">/</span> 
<span class="nc">NUM_SAMPLES</span><span 
class="o">)</span></code></pre></figure>
+<figure class="highlight"><pre><code class="language-scala" 
data-lang="scala"><span></span><span class="k">val</span> <span 
class="n">count</span> <span class="k">=</span> <span class="n">sc</span><span 
class="o">.</span><span class="n">parallelize</span><span 
class="o">(</span><span class="mi">1</span> <span class="n">to</span> <span 
class="nc">NUM_SAMPLES</span><span class="o">).</span><span 
class="n">filter</span> <span class="o">{</span> <span class="k">_</span> <span 
class="k">=&gt;</span>
+  <span class="k">val</span> <span class="n">x</span> <span class="k">=</span> 
<span class="n">math</span><span class="o">.</span><span class="n">random</span>
+  <span class="k">val</span> <span class="n">y</span> <span class="k">=</span> 
<span class="n">math</span><span class="o">.</span><span class="n">random</span>
+  <span class="n">x</span><span class="o">*</span><span class="n">x</span> 
<span class="o">+</span> <span class="n">y</span><span class="o">*</span><span 
class="n">y</span> <span class="o">&lt;</span> <span class="mi">1</span>
+<span class="o">}.</span><span class="n">count</span><span class="o">()</span>
+<span class="n">println</span><span class="o">(</span><span 
class="s">s&quot;Pi is roughly </span><span class="si">${</span><span 
class="mf">4.0</span> <span class="o">*</span> <span class="n">count</span> 
<span class="o">/</span> <span class="nc">NUM_SAMPLES</span><span 
class="si">}</span><span class="s">&quot;</span><span 
class="o">)</span></code></pre></figure>
 
 </div>
 </div>
@@ -302,17 +296,15 @@ In this page, we will show examples using RDD API as well 
as examples using high
 <div class="tab-pane tab-pane-java">
 <div class="code code-tab">
 
-<figure class="highlight"><pre><code class="language-java" 
data-lang="java"><span></span><span class="n">List</span><span 
class="o">&lt;</span><span class="n">Integer</span><span class="o">&gt;</span> 
<span class="n">l</span> <span class="o">=</span> <span class="k">new</span> 
<span class="n">ArrayList</span><span class="o">&lt;</span><span 
class="n">Integer</span><span class="o">&gt;(</span><span 
class="n">NUM_SAMPLES</span><span class="o">);</span>
+<figure class="highlight"><pre><code class="language-java" 
data-lang="java"><span></span><span class="n">List</span><span 
class="o">&lt;</span><span class="n">Integer</span><span class="o">&gt;</span> 
<span class="n">l</span> <span class="o">=</span> <span class="k">new</span> 
<span class="n">ArrayList</span><span class="o">&lt;&gt;(</span><span 
class="n">NUM_SAMPLES</span><span class="o">);</span>
 <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> 
<span class="n">i</span> <span class="o">=</span> <span 
class="mi">0</span><span class="o">;</span> <span class="n">i</span> <span 
class="o">&lt;</span> <span class="n">NUM_SAMPLES</span><span 
class="o">;</span> <span class="n">i</span><span class="o">++)</span> <span 
class="o">{</span>
   <span class="n">l</span><span class="o">.</span><span 
class="na">add</span><span class="o">(</span><span class="n">i</span><span 
class="o">);</span>
 <span class="o">}</span>
 
-<span class="kt">long</span> <span class="n">count</span> <span 
class="o">=</span> <span class="n">sc</span><span class="o">.</span><span 
class="na">parallelize</span><span class="o">(</span><span 
class="n">l</span><span class="o">).</span><span class="na">filter</span><span 
class="o">(</span><span class="k">new</span> <span 
class="n">Function</span><span class="o">&lt;</span><span 
class="n">Integer</span><span class="o">,</span> <span 
class="n">Boolean</span><span class="o">&gt;()</span> <span class="o">{</span>
-  <span class="kd">public</span> <span class="n">Boolean</span> <span 
class="nf">call</span><span class="o">(</span><span class="n">Integer</span> 
<span class="n">i</span><span class="o">)</span> <span class="o">{</span>
-    <span class="kt">double</span> <span class="n">x</span> <span 
class="o">=</span> <span class="n">Math</span><span class="o">.</span><span 
class="na">random</span><span class="o">();</span>
-    <span class="kt">double</span> <span class="n">y</span> <span 
class="o">=</span> <span class="n">Math</span><span class="o">.</span><span 
class="na">random</span><span class="o">();</span>
-    <span class="k">return</span> <span class="n">x</span><span 
class="o">*</span><span class="n">x</span> <span class="o">+</span> <span 
class="n">y</span><span class="o">*</span><span class="n">y</span> <span 
class="o">&lt;</span> <span class="mi">1</span><span class="o">;</span>
-  <span class="o">}</span>
+<span class="kt">long</span> <span class="n">count</span> <span 
class="o">=</span> <span class="n">sc</span><span class="o">.</span><span 
class="na">parallelize</span><span class="o">(</span><span 
class="n">l</span><span class="o">).</span><span class="na">filter</span><span 
class="o">(</span><span class="n">i</span> <span class="o">-&gt;</span> <span 
class="o">{</span>
+  <span class="kt">double</span> <span class="n">x</span> <span 
class="o">=</span> <span class="n">Math</span><span class="o">.</span><span 
class="na">random</span><span class="o">();</span>
+  <span class="kt">double</span> <span class="n">y</span> <span 
class="o">=</span> <span class="n">Math</span><span class="o">.</span><span 
class="na">random</span><span class="o">();</span>
+  <span class="k">return</span> <span class="n">x</span><span 
class="o">*</span><span class="n">x</span> <span class="o">+</span> <span 
class="n">y</span><span class="o">*</span><span class="n">y</span> <span 
class="o">&lt;</span> <span class="mi">1</span><span class="o">;</span>
 <span class="o">}).</span><span class="na">count</span><span 
class="o">();</span>
 <span class="n">System</span><span class="o">.</span><span 
class="na">out</span><span class="o">.</span><span 
class="na">println</span><span class="o">(</span><span class="s">&quot;Pi is 
roughly &quot;</span> <span class="o">+</span> <span class="mf">4.0</span> 
<span class="o">*</span> <span class="n">count</span> <span class="o">/</span> 
<span class="n">NUM_SAMPLES</span><span 
class="o">);</span></code></pre></figure>
 
@@ -380,14 +372,9 @@ Also, programs based on DataFrame API will be 
automatically optimized by Spark
 
 <figure class="highlight"><pre><code class="language-java" 
data-lang="java"><span></span><span class="c1">// Creates a DataFrame having a 
single column named &quot;line&quot;</span>
 <span class="n">JavaRDD</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">&gt;</span> <span 
class="n">textFile</span> <span class="o">=</span> <span 
class="n">sc</span><span class="o">.</span><span 
class="na">textFile</span><span class="o">(</span><span 
class="s">&quot;hdfs://...&quot;</span><span class="o">);</span>
-<span class="n">JavaRDD</span><span class="o">&lt;</span><span 
class="n">Row</span><span class="o">&gt;</span> <span class="n">rowRDD</span> 
<span class="o">=</span> <span class="n">textFile</span><span 
class="o">.</span><span class="na">map</span><span class="o">(</span>
-  <span class="k">new</span> <span class="n">Function</span><span 
class="o">&lt;</span><span class="n">String</span><span class="o">,</span> 
<span class="n">Row</span><span class="o">&gt;()</span> <span class="o">{</span>
-    <span class="kd">public</span> <span class="n">Row</span> <span 
class="nf">call</span><span class="o">(</span><span class="n">String</span> 
<span class="n">line</span><span class="o">)</span> <span 
class="kd">throws</span> <span class="n">Exception</span> <span 
class="o">{</span>
-      <span class="k">return</span> <span class="n">RowFactory</span><span 
class="o">.</span><span class="na">create</span><span class="o">(</span><span 
class="n">line</span><span class="o">);</span>
-    <span class="o">}</span>
-  <span class="o">});</span>
-<span class="n">List</span><span class="o">&lt;</span><span 
class="n">StructField</span><span class="o">&gt;</span> <span 
class="n">fields</span> <span class="o">=</span> <span class="k">new</span> 
<span class="n">ArrayList</span><span class="o">&lt;</span><span 
class="n">StructField</span><span class="o">&gt;();</span>
-<span class="n">fields</span><span class="o">.</span><span 
class="na">add</span><span class="o">(</span><span 
class="n">DataTypes</span><span class="o">.</span><span 
class="na">createStructField</span><span class="o">(</span><span 
class="s">&quot;line&quot;</span><span class="o">,</span> <span 
class="n">DataTypes</span><span class="o">.</span><span 
class="na">StringType</span><span class="o">,</span> <span 
class="kc">true</span><span class="o">));</span>
+<span class="n">JavaRDD</span><span class="o">&lt;</span><span 
class="n">Row</span><span class="o">&gt;</span> <span class="n">rowRDD</span> 
<span class="o">=</span> <span class="n">textFile</span><span 
class="o">.</span><span class="na">map</span><span class="o">(</span><span 
class="n">RowFactory</span><span class="o">::</span><span 
class="n">create</span><span class="o">);</span>
+<span class="n">List</span><span class="o">&lt;</span><span 
class="n">StructField</span><span class="o">&gt;</span> <span 
class="n">fields</span> <span class="o">=</span> <span 
class="n">Arrays</span><span class="o">.</span><span 
class="na">asList</span><span class="o">(</span>
+  <span class="n">DataTypes</span><span class="o">.</span><span 
class="na">createStructField</span><span class="o">(</span><span 
class="s">&quot;line&quot;</span><span class="o">,</span> <span 
class="n">DataTypes</span><span class="o">.</span><span 
class="na">StringType</span><span class="o">,</span> <span 
class="kc">true</span><span class="o">));</span>
 <span class="n">StructType</span> <span class="n">schema</span> <span 
class="o">=</span> <span class="n">DataTypes</span><span 
class="o">.</span><span class="na">createStructType</span><span 
class="o">(</span><span class="n">fields</span><span class="o">);</span>
 <span class="n">DataFrame</span> <span class="n">df</span> <span 
class="o">=</span> <span class="n">sqlContext</span><span 
class="o">.</span><span class="na">createDataFrame</span><span 
class="o">(</span><span class="n">rowRDD</span><span class="o">,</span> <span 
class="n">schema</span><span class="o">);</span>
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to