http://git-wip-us.apache.org/repos/asf/spark-website/blob/da71a5c1/site/docs/2.1.3/api/python/pyspark.sql.html ---------------------------------------------------------------------- diff --git a/site/docs/2.1.3/api/python/pyspark.sql.html b/site/docs/2.1.3/api/python/pyspark.sql.html index 329ea36..446f743 100644 --- a/site/docs/2.1.3/api/python/pyspark.sql.html +++ b/site/docs/2.1.3/api/python/pyspark.sql.html @@ -201,7 +201,7 @@ cluster.</p> <dl class="attribute"> <dt id="pyspark.sql.SparkSession.builder"> -<code class="descname">builder</code><em class="property"> = <pyspark.sql.session.Builder object></em><a class="headerlink" href="#pyspark.sql.SparkSession.builder" title="Permalink to this definition">¶</a></dt> +<code class="descname">builder</code><em class="property"> = <pyspark.sql.session.SparkSession.Builder object></em><a class="headerlink" href="#pyspark.sql.SparkSession.builder" title="Permalink to this definition">¶</a></dt> <dd></dd></dl> <dl class="attribute"> @@ -270,22 +270,22 @@ omit the <code class="docutils literal notranslate"><span class="pre">struct< </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">l</span> <span class="o">=</span> <span class="p">[(</span><span class="s1">'Alice'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)]</span> <span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(_1=u'Alice', _2=1)]</span> +<span class="go">[Row(_1='Alice', _2=1)]</span> <span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="p">[</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">])</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=1)]</span> +<span class="go">[Row(name='Alice', age=1)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">d</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'Alice'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">:</span> <span class="mi">1</span><span class="p">}]</span> <span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">d</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=1, name=u'Alice')]</span> +<span class="go">[Row(age=1, name='Alice')]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="n">l</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(_1=u'Alice', _2=1)]</span> +<span class="go">[Row(_1='Alice', _2=1)]</span> <span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="p">[</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">])</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=1)]</span> +<span class="go">[Row(name='Alice', age=1)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">Row</span> @@ -293,7 +293,7 @@ omit the <code class="docutils literal notranslate"><span class="pre">struct< <span class="gp">>>> </span><span class="n">person</span> <span class="o">=</span> <span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">r</span><span class="p">:</span> <span class="n">Person</span><span class="p">(</span><span class="o">*</span><span class="n">r</span><span class="p">))</span> <span class="gp">>>> </span><span class="n">df2</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">person</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">df2</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=1)]</span> +<span class="go">[Row(name='Alice', age=1)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="k">import</span> <span class="o">*</span> @@ -302,17 +302,17 @@ omit the <code class="docutils literal notranslate"><span class="pre">struct< <span class="gp">... </span> <span class="n">StructField</span><span class="p">(</span><span class="s2">"age"</span><span class="p">,</span> <span class="n">IntegerType</span><span class="p">(),</span> <span class="kc">True</span><span class="p">)])</span> <span class="gp">>>> </span><span class="n">df3</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="n">schema</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">df3</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=1)]</span> +<span class="go">[Row(name='Alice', age=1)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">toPandas</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=1)]</span> +<span class="go">[Row(name='Alice', age=1)]</span> <span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">pandas</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]]))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> <span class="go">[Row(0=1, 1=2)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="s2">"a: string, b: int"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(a=u'Alice', b=1)]</span> +<span class="go">[Row(a='Alice', b=1)]</span> <span class="gp">>>> </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="s2">"int"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> <span class="go">[Row(value=1)]</span> @@ -439,7 +439,7 @@ as a streaming <a class="reference internal" href="#pyspark.sql.DataFrame" title <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">createOrReplaceTempView</span><span class="p">(</span><span class="s2">"table1"</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">df2</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">"SELECT field1 AS f1, field2 as f2 from table1"</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">df2</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]</span> +<span class="go">[Row(f1=1, f2='row1'), Row(f1=2, f2='row2'), Row(f1=3, f2='row3')]</span> </pre></div> </div> <div class="versionadded"> @@ -620,22 +620,22 @@ If itâs not a <a class="reference internal" href="#pyspark.sql.types.StructTyp </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">l</span> <span class="o">=</span> <span class="p">[(</span><span class="s1">'Alice'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)]</span> <span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(_1=u'Alice', _2=1)]</span> +<span class="go">[Row(_1='Alice', _2=1)]</span> <span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="p">[</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">])</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=1)]</span> +<span class="go">[Row(name='Alice', age=1)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">d</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'Alice'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">:</span> <span class="mi">1</span><span class="p">}]</span> <span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">d</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=1, name=u'Alice')]</span> +<span class="go">[Row(age=1, name='Alice')]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="n">l</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(_1=u'Alice', _2=1)]</span> +<span class="go">[Row(_1='Alice', _2=1)]</span> <span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="p">[</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">])</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=1)]</span> +<span class="go">[Row(name='Alice', age=1)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">Row</span> @@ -643,7 +643,7 @@ If itâs not a <a class="reference internal" href="#pyspark.sql.types.StructTyp <span class="gp">>>> </span><span class="n">person</span> <span class="o">=</span> <span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">r</span><span class="p">:</span> <span class="n">Person</span><span class="p">(</span><span class="o">*</span><span class="n">r</span><span class="p">))</span> <span class="gp">>>> </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">person</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">df2</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=1)]</span> +<span class="go">[Row(name='Alice', age=1)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="k">import</span> <span class="o">*</span> @@ -652,17 +652,17 @@ If itâs not a <a class="reference internal" href="#pyspark.sql.types.StructTyp <span class="gp">... </span> <span class="n">StructField</span><span class="p">(</span><span class="s2">"age"</span><span class="p">,</span> <span class="n">IntegerType</span><span class="p">(),</span> <span class="kc">True</span><span class="p">)])</span> <span class="gp">>>> </span><span class="n">df3</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="n">schema</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">df3</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=1)]</span> +<span class="go">[Row(name='Alice', age=1)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">toPandas</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=1)]</span> +<span class="go">[Row(name='Alice', age=1)]</span> <span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">pandas</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]]))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> <span class="go">[Row(0=1, 1=2)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="s2">"a: string, b: int"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(a=u'Alice', b=1)]</span> +<span class="go">[Row(a='Alice', b=1)]</span> <span class="gp">>>> </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="s2">"int"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> <span class="go">[Row(value=1)]</span> @@ -721,12 +721,12 @@ created external table.</p> defaultValue. If the key is not set and defaultValue is None, return the system default value.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">getConf</span><span class="p">(</span><span class="s2">"spark.sql.shuffle.partitions"</span><span class="p">)</span> -<span class="go">u'200'</span> +<span class="go">'200'</span> <span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">getConf</span><span class="p">(</span><span class="s2">"spark.sql.shuffle.partitions"</span><span class="p">,</span> <span class="sa">u</span><span class="s2">"10"</span><span class="p">)</span> -<span class="go">u'10'</span> +<span class="go">'10'</span> <span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">setConf</span><span class="p">(</span><span class="s2">"spark.sql.shuffle.partitions"</span><span class="p">,</span> <span class="sa">u</span><span class="s2">"50"</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">getConf</span><span class="p">(</span><span class="s2">"spark.sql.shuffle.partitions"</span><span class="p">,</span> <span class="sa">u</span><span class="s2">"10"</span><span class="p">)</span> -<span class="go">u'50'</span> +<span class="go">'50'</span> </pre></div> </div> <div class="versionadded"> @@ -880,7 +880,7 @@ be done. For any other return type, the produced object must match the specifie </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">registerFunction</span><span class="p">(</span><span class="s2">"stringLengthString"</span><span class="p">,</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> <span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">"SELECT stringLengthString('test')"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(stringLengthString(test)=u'4')]</span> +<span class="go">[Row(stringLengthString(test)='4')]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="k">import</span> <span class="n">IntegerType</span> @@ -948,7 +948,7 @@ When the return type is not specified we would infer it via reflection. <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">registerDataFrameAsTable</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="s2">"table1"</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">"SELECT field1 AS f1, field2 as f2 from table1"</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">df2</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]</span> +<span class="go">[Row(f1=1, f2='row1'), Row(f1=2, f2='row2'), Row(f1=3, f2='row3')]</span> </pre></div> </div> <div class="versionadded"> @@ -1039,7 +1039,7 @@ When the return type is not specified we would infer it via reflection. <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">registerDataFrameAsTable</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="s2">"table1"</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span> <span class="gp">>>> </span><span class="n">df2</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="s2">"tableName = 'table1'"</span><span class="p">)</span><span class="o">.</span><span class="n">first</span><span class="p">()</span> -<span class="go">Row(database=u'', tableName=u'table1', isTemporary=True)</span> +<span class="go">Row(database='', tableName='table1', isTemporary=True)</span> </pre></div> </div> <div class="versionadded"> @@ -1137,7 +1137,7 @@ be done. For any other return type, the produced object must match the specifie </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">registerFunction</span><span class="p">(</span><span class="s2">"stringLengthString"</span><span class="p">,</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> <span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">"SELECT stringLengthString('test')"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(stringLengthString(test)=u'4')]</span> +<span class="go">[Row(stringLengthString(test)='4')]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="k">import</span> <span class="n">IntegerType</span> @@ -1212,7 +1212,7 @@ and can be created using various functions in <a class="reference internal" href <span class="gp">>>> </span><span class="n">df_as2</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"df_as2"</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">joined_df</span> <span class="o">=</span> <span class="n">df_as1</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df_as2</span><span class="p">,</span> <span class="n">col</span><span class="p">(</span><span class="s2">"df_as1.name"</span><span class="p">)</span> <span class="o">==</span> <span class="n">col</span><span class="p">(</span><span class="s2">"df_as2.name"</span><span class="p">),</span> <span class="s1">'inner'</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">joined_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"df_as1.name"</span><span class="p">,</span> <span class="s2">"df_as2.name"</span><span class="p">,</span> <span class="s2">"df_as2.age"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Bob', name=u'Bob', age=5), Row(name=u'Alice', name=u'Alice', age=2)]</span> +<span class="go">[Row(name='Bob', name='Bob', age=5), Row(name='Alice', name='Alice', age=2)]</span> </pre></div> </div> <div class="versionadded"> @@ -1329,7 +1329,7 @@ the current partitioning is).</p> <code class="descname">collect</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/dataframe.html#DataFrame.collect"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame.collect" title="Permalink to this definition">¶</a></dt> <dd><p>Returns all the records as a list of <a class="reference internal" href="#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span> +<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span> </pre></div> </div> <div class="versionadded"> @@ -1489,12 +1489,12 @@ catalog.</p> </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"age"</span><span class="p">,</span> <span class="s2">"name"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span> +<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span> <span class="gp">>>> </span><span class="n">df2</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"name"</span><span class="p">,</span> <span class="s2">"height"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Tom', height=80), Row(name=u'Bob', height=85)]</span> +<span class="go">[Row(name='Tom', height=80), Row(name='Bob', height=85)]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">crossJoin</span><span class="p">(</span><span class="n">df2</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"height"</span><span class="p">))</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"age"</span><span class="p">,</span> <span class="s2">"name"</span><span class="p">,</span> <span class="s2">"height"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=2, name=u'Alice', height=80), Row(age=2, name=u'Alice', height=85),</span> -<span class="go"> Row(age=5, name=u'Bob', height=80), Row(age=5, name=u'Bob', height=85)]</span> +<span class="go">[Row(age=2, name='Alice', height=80), Row(age=2, name='Alice', height=85),</span> +<span class="go"> Row(age=5, name='Bob', height=80), Row(age=5, name='Bob', height=85)]</span> </pre></div> </div> <div class="versionadded"> @@ -1621,23 +1621,23 @@ This is a no-op if schema doesnât contain the given column name(s).</p> </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s1">'age'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice'), Row(name=u'Bob')]</span> +<span class="go">[Row(name='Alice'), Row(name='Bob')]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice'), Row(name=u'Bob')]</span> +<span class="go">[Row(name='Alice'), Row(name='Bob')]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">df2</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">'inner'</span><span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, height=85, name=u'Bob')]</span> +<span class="go">[Row(age=5, height=85, name='Bob')]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">df2</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">'inner'</span><span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">df2</span><span class="o">.</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob', height=85)]</span> +<span class="go">[Row(age=5, name='Bob', height=85)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="s1">'name'</span><span class="p">,</span> <span class="s1">'inner'</span><span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s1">'age'</span><span class="p">,</span> <span class="s1">'height'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Bob')]</span> +<span class="go">[Row(name='Bob')]</span> </pre></div> </div> <div class="versionadded"> @@ -1834,15 +1834,15 @@ or a string of SQL expression.</td> </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">></span> <span class="mi">3</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob')]</span> +<span class="go">[Row(age=5, name='Bob')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">==</span> <span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=2, name='Alice')]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="s2">"age > 3"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob')]</span> +<span class="go">[Row(age=5, name='Bob')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="s2">"age = 2"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=2, name='Alice')]</span> </pre></div> </div> <div class="versionadded"> @@ -1855,7 +1855,7 @@ or a string of SQL expression.</td> <code class="descname">first</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/dataframe.html#DataFrame.first"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame.first" title="Permalink to this definition">¶</a></dt> <dd><p>Returns the first row as a <a class="reference internal" href="#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">first</span><span class="p">()</span> -<span class="go">Row(age=2, name=u'Alice')</span> +<span class="go">Row(age=2, name='Alice')</span> </pre></div> </div> <div class="versionadded"> @@ -1944,11 +1944,11 @@ Each element should be a column name (string) or an expression (<a class="refere <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">avg</span><span class="p">()</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> <span class="go">[Row(avg(age)=3.5)]</span> <span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span><span class="s1">'name'</span><span class="p">)</span><span class="o">.</span><span class="n">agg</span><span class="p">({</span><span class="s1">'age'</span><span class="p">:</span> <span class="s1">'mean'</span><span class="p">})</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span> -<span class="go">[Row(name=u'Alice', avg(age)=2.0), Row(name=u'Bob', avg(age)=5.0)]</span> +<span class="go">[Row(name='Alice', avg(age)=2.0), Row(name='Bob', avg(age)=5.0)]</span> <span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">avg</span><span class="p">()</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span> -<span class="go">[Row(name=u'Alice', avg(age)=2.0), Row(name=u'Bob', avg(age)=5.0)]</span> +<span class="go">[Row(name='Alice', avg(age)=2.0), Row(name='Bob', avg(age)=5.0)]</span> <span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">([</span><span class="s1">'name'</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">])</span><span class="o">.</span><span class="n">count</span><span class="p">()</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span> -<span class="go">[Row(name=u'Alice', age=2, count=1), Row(name=u'Bob', age=5, count=1)]</span> +<span class="go">[Row(name='Alice', age=2, count=1), Row(name='Bob', age=5, count=1)]</span> </pre></div> </div> <div class="versionadded"> @@ -1986,9 +1986,9 @@ If n is 1, return a single Row.</td> </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">()</span> -<span class="go">Row(age=2, name=u'Alice')</span> +<span class="go">Row(age=2, name='Alice')</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> -<span class="go">[Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=2, name='Alice')]</span> </pre></div> </div> <div class="versionadded"> @@ -2059,24 +2059,24 @@ the column(s) must exist on both sides, and this performs an equi-join.</li> </table> <p>The following performs a full outer join between <code class="docutils literal notranslate"><span class="pre">df1</span></code> and <code class="docutils literal notranslate"><span class="pre">df2</span></code>.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">df2</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">'outer'</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df2</span><span class="o">.</span><span class="n">height</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=None, height=80), Row(name=u'Bob', height=85), Row(name=u'Alice', height=None)]</span> +<span class="go">[Row(name=None, height=80), Row(name='Bob', height=85), Row(name='Alice', height=None)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="s1">'name'</span><span class="p">,</span> <span class="s1">'outer'</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'height'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Tom', height=80), Row(name=u'Bob', height=85), Row(name=u'Alice', height=None)]</span> +<span class="go">[Row(name='Tom', height=80), Row(name='Bob', height=85), Row(name='Alice', height=None)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">cond</span> <span class="o">=</span> <span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">df3</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">==</span> <span class="n">df3</span><span class="o">.</span><span class="n">age</span><span class="p">]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df3</span><span class="p">,</span> <span class="n">cond</span><span class="p">,</span> <span class="s1">'outer'</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df3</span><span class="o">.</span><span class="n">age</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)]</span> +<span class="go">[Row(name='Alice', age=2), Row(name='Bob', age=5)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="s1">'name'</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df2</span><span class="o">.</span><span class="n">height</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Bob', height=85)]</span> +<span class="go">[Row(name='Bob', height=85)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df4</span><span class="p">,</span> <span class="p">[</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">])</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Bob', age=5)]</span> +<span class="go">[Row(name='Bob', age=5)]</span> </pre></div> </div> <div class="versionadded"> @@ -2089,7 +2089,7 @@ the column(s) must exist on both sides, and this performs an equi-join.</li> <code class="descname">limit</code><span class="sig-paren">(</span><em>num</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/dataframe.html#DataFrame.limit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame.limit" title="Permalink to this definition">¶</a></dt> <dd><p>Limits the result count to the number specified.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=2, name='Alice')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> <span class="go">[]</span> </pre></div> @@ -2127,18 +2127,18 @@ If a list is specified, length of the list must equal length of the <cite>cols</ </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="s2">"age"</span><span class="p">,</span> <span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span> <span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql.functions</span> <span class="k">import</span> <span class="o">*</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">asc</span><span class="p">(</span><span class="s2">"age"</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span> +<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">desc</span><span class="p">(</span><span class="s2">"age"</span><span class="p">),</span> <span class="s2">"name"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">([</span><span class="s2">"age"</span><span class="p">,</span> <span class="s2">"name"</span><span class="p">],</span> <span class="n">ascending</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span> </pre></div> </div> <div class="versionadded"> @@ -2455,11 +2455,11 @@ in the current DataFrame.</td> </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s1">'*'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span> +<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)]</span> +<span class="go">[Row(name='Alice', age=2), Row(name='Bob', age=5)]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">+</span> <span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'age'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(name=u'Alice', age=12), Row(name=u'Bob', age=15)]</span> +<span class="go">[Row(name='Alice', age=12), Row(name='Bob', age=15)]</span> </pre></div> </div> <div class="versionadded"> @@ -2541,18 +2541,18 @@ If a list is specified, length of the list must equal length of the <cite>cols</ </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="s2">"age"</span><span class="p">,</span> <span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span> <span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql.functions</span> <span class="k">import</span> <span class="o">*</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">asc</span><span class="p">(</span><span class="s2">"age"</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span> +<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">desc</span><span class="p">(</span><span class="s2">"age"</span><span class="p">),</span> <span class="s2">"name"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">([</span><span class="s2">"age"</span><span class="p">,</span> <span class="s2">"name"</span><span class="p">],</span> <span class="n">ascending</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span> </pre></div> </div> <div class="versionadded"> @@ -2634,7 +2634,7 @@ but not in another frame.</p> <code class="descname">take</code><span class="sig-paren">(</span><em>num</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/dataframe.html#DataFrame.take"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame.take" title="Permalink to this definition">¶</a></dt> <dd><p>Returns the first <code class="docutils literal notranslate"><span class="pre">num</span></code> rows as a <code class="xref py py-class docutils literal notranslate"><span class="pre">list</span></code> of <a class="reference internal" href="#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> -<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span> +<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span> </pre></div> </div> <div class="versionadded"> @@ -2655,7 +2655,7 @@ but not in another frame.</p> </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">toDF</span><span class="p">(</span><span class="s1">'f1'</span><span class="p">,</span> <span class="s1">'f2'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(f1=2, f2=u'Alice'), Row(f1=5, f2=u'Bob')]</span> +<span class="go">[Row(f1=2, f2='Alice'), Row(f1=5, f2='Bob')]</span> </pre></div> </div> </dd></dl> @@ -2666,7 +2666,7 @@ but not in another frame.</p> <dd><p>Converts a <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> into a <code class="xref py py-class docutils literal notranslate"><span class="pre">RDD</span></code> of string.</p> <p>Each row is turned into a JSON document as one element in the returned RDD.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">toJSON</span><span class="p">()</span><span class="o">.</span><span class="n">first</span><span class="p">()</span> -<span class="go">u'{"age":2,"name":"Alice"}'</span> +<span class="go">'{"age":2,"name":"Alice"}'</span> </pre></div> </div> <div class="versionadded"> @@ -2680,7 +2680,7 @@ but not in another frame.</p> <dd><p>Returns an iterator that contains all of the rows in this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>. The iterator will consume as much memory as the largest partition in this DataFrame.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">list</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">toLocalIterator</span><span class="p">())</span> -<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span> +<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span> </pre></div> </div> <div class="versionadded"> @@ -2776,7 +2776,7 @@ existing column that has the same name.</p> </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="s1">'age2'</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">+</span> <span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=2, name=u'Alice', age2=4), Row(age=5, name=u'Bob', age2=7)]</span> +<span class="go">[Row(age=2, name='Alice', age2=4), Row(age=5, name='Bob', age2=7)]</span> </pre></div> </div> <div class="versionadded"> @@ -2802,7 +2802,7 @@ This is a no-op if schema doesnât contain the given column name.</p> </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">withColumnRenamed</span><span class="p">(</span><span class="s1">'age'</span><span class="p">,</span> <span class="s1">'age2'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age2=2, name=u'Alice'), Row(age2=5, name=u'Bob')]</span> +<span class="go">[Row(age2=2, name='Alice'), Row(age2=5, name='Bob')]</span> </pre></div> </div> <div class="versionadded"> @@ -2929,12 +2929,12 @@ or a list of <a class="reference internal" href="#pyspark.sql.Column" title="pys </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">gdf</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> <span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">gdf</span><span class="o">.</span><span class="n">agg</span><span class="p">({</span><span class="s2">"*"</span><span class="p">:</span> <span class="s2">"count"</span><span class="p">})</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span> -<span class="go">[Row(name=u'Alice', count(1)=1), Row(name=u'Bob', count(1)=1)]</span> +<span class="go">[Row(name='Alice', count(1)=1), Row(name='Bob', count(1)=1)]</span> </pre></div> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">F</span> <span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">gdf</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span> -<span class="go">[Row(name=u'Alice', min(age)=2), Row(name=u'Bob', min(age)=5)]</span> +<span class="go">[Row(name='Alice', min(age)=2), Row(name='Bob', min(age)=5)]</span> </pre></div> </div> <div class="versionadded"> @@ -3189,9 +3189,9 @@ expression is between the given columns.</p> <code class="descname">cast</code><span class="sig-paren">(</span><em>dataType</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/column.html#Column.cast"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.Column.cast" title="Permalink to this definition">¶</a></dt> <dd><p>Convert the column into type <code class="docutils literal notranslate"><span class="pre">dataType</span></code>.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">"string"</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'ages'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(ages=u'2'), Row(ages=u'5')]</span> +<span class="go">[Row(ages='2'), Row(ages='5')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">StringType</span><span class="p">())</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'ages'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(ages=u'2'), Row(ages=u'5')]</span> +<span class="go">[Row(ages='2'), Row(ages='5')]</span> </pre></div> </div> <div class="versionadded"> @@ -3279,9 +3279,9 @@ or gets an item by key out of a dict.</p> <dd><p>A boolean expression that is evaluated to true if the value of this expression is contained by the evaluated values of the arguments.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">isin</span><span class="p">(</span><span class="s2">"Bob"</span><span class="p">,</span> <span class="s2">"Mike"</span><span class="p">)]</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=5, name=u'Bob')]</span> +<span class="go">[Row(age=5, name='Bob')]</span> <span class="gp">>>> </span><span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">isin</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])]</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(age=2, name=u'Alice')]</span> +<span class="go">[Row(age=2, name='Alice')]</span> </pre></div> </div> <div class="versionadded"> @@ -3387,7 +3387,7 @@ If <a class="reference internal" href="#pyspark.sql.Column.otherwise" title="pys </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">substr</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"col"</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(col=u'Ali'), Row(col=u'Bob')]</span> +<span class="go">[Row(col='Ali'), Row(col='Bob')]</span> </pre></div> </div> <div class="versionadded"> @@ -3950,12 +3950,12 @@ any value greater than or equal to 9223372036854775807.</li> <dl class="attribute"> <dt id="pyspark.sql.Window.unboundedFollowing"> -<code class="descname">unboundedFollowing</code><em class="property"> = 9223372036854775807L</em><a class="headerlink" href="#pyspark.sql.Window.unboundedFollowing" title="Permalink to this definition">¶</a></dt> +<code class="descname">unboundedFollowing</code><em class="property"> = 9223372036854775807</em><a class="headerlink" href="#pyspark.sql.Window.unboundedFollowing" title="Permalink to this definition">¶</a></dt> <dd></dd></dl> <dl class="attribute"> <dt id="pyspark.sql.Window.unboundedPreceding"> -<code class="descname">unboundedPreceding</code><em class="property"> = -9223372036854775808L</em><a class="headerlink" href="#pyspark.sql.Window.unboundedPreceding" title="Permalink to this definition">¶</a></dt> +<code class="descname">unboundedPreceding</code><em class="property"> = -9223372036854775808</em><a class="headerlink" href="#pyspark.sql.Window.unboundedPreceding" title="Permalink to this definition">¶</a></dt> <dd></dd></dl> </dd></dl> @@ -4463,7 +4463,7 @@ are any.</p> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="s1">'python/test_support/sql/text-test.txt'</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(value=u'hello'), Row(value=u'this')]</span> +<span class="go">[Row(value='hello'), Row(value='this')]</span> </pre></div> </div> <div class="versionadded"> @@ -5544,7 +5544,7 @@ elements and value must be of the same type.</p> <code class="descclassname">pyspark.sql.functions.</code><code class="descname">bin</code><span class="sig-paren">(</span><em>col</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/functions.html#bin"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.functions.bin" title="Permalink to this definition">¶</a></dt> <dd><p>Returns the string representation of the binary value of the given column.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="nb">bin</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'c'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(c=u'10'), Row(c=u'101')]</span> +<span class="go">[Row(c='10'), Row(c='101')]</span> </pre></div> </div> <div class="versionadded"> @@ -5684,7 +5684,7 @@ or at integral part when <cite>scale</cite> < 0.</p> <dd><p>Concatenates multiple input string columns together into a single string column.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="s1">'abcd'</span><span class="p">,</span><span class="s1">'123'</span><span class="p">)],</span> <span class="p">[</span><span class="s1">'s'</span><span class="p">,</span> <span class="s1">'d'</span><span class="p">])</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">concat</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">s</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">d</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'s'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(s=u'abcd123')]</span> +<span class="go">[Row(s='abcd123')]</span> </pre></div> </div> <div class="versionadded"> @@ -5699,7 +5699,7 @@ or at integral part when <cite>scale</cite> < 0.</p> using the given separator.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="s1">'abcd'</span><span class="p">,</span><span class="s1">'123'</span><span class="p">)],</span> <span class="p">[</span><span class="s1">'s'</span><span class="p">,</span> <span class="s1">'d'</span><span class="p">])</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">concat_ws</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">s</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">d</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'s'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(s=u'abcd-123')]</span> +<span class="go">[Row(s='abcd-123')]</span> </pre></div> </div> <div class="versionadded"> @@ -5713,7 +5713,7 @@ using the given separator.</p> <dd><p>Convert a number in a string column from one base to another.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="s2">"010101"</span><span class="p">,)],</span> <span class="p">[</span><span class="s1">'n'</span><span class="p">])</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">conv</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">n</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">16</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'hex'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(hex=u'15')]</span> +<span class="go">[Row(hex='15')]</span> </pre></div> </div> <div class="versionadded"> @@ -5844,9 +5844,9 @@ as key-value pairs, e.g. (key1, value1, key2, value2, â¦).</td> </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">create_map</span><span class="p">(</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"map"</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(map={u'Alice': 2}), Row(map={u'Bob': 5})]</span> +<span class="go">[Row(map={'Alice': 2}), Row(map={'Bob': 5})]</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">create_map</span><span class="p">([</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">])</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"map"</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(map={u'Alice': 2}), Row(map={u'Bob': 5})]</span> +<span class="go">[Row(map={'Alice': 2}), Row(map={'Bob': 5})]</span> </pre></div> </div> <div class="versionadded"> @@ -5907,7 +5907,7 @@ specialized implementation.</p> </div> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="s1">'2015-04-08'</span><span class="p">,)],</span> <span class="p">[</span><span class="s1">'a'</span><span class="p">])</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">date_format</span><span class="p">(</span><span class="s1">'a'</span><span class="p">,</span> <span class="s1">'MM/dd/yyy'</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'date'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(date=u'04/08/2015')]</span> +<span class="go">[Row(date='04/08/2015')]</span> </pre></div> </div> <div class="versionadded"> @@ -6130,7 +6130,7 @@ and returns the result as a string.</p> </tbody> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="mi">5</span><span class="p">,)],</span> <span class="p">[</span><span class="s1">'a'</span><span class="p">])</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">format_number</span><span class="p">(</span><span class="s1">'a'</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'v'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(v=u'5.0000')]</span> +<span class="go">[Row(v='5.0000')]</span> </pre></div> </div> <div class="versionadded"> @@ -6156,7 +6156,7 @@ and returns the result as a string.</p> </table> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="mi">5</span><span class="p">,</span> <span class="s2">"hello"</span><span class="p">)],</span> <span class="p">[</span><span class="s1">'a'</span><span class="p">,</span> <span class="s1">'b'</span><span class="p">])</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">format_string</span><span class="p">(</span><span class="s1">'</span><span class="si">%d</span><span class="s1"> </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">a</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">b</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'v'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(v=u'5 hello')]</span> +<span class="go">[Row(v='5 hello')]</span> </pre></div> </div> <div class="versionadded"> @@ -6242,7 +6242,7 @@ of the extracted json object. It will return null if the input json string is in <span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="p">(</span><span class="s2">"key"</span><span class="p">,</span> <span class="s2">"jstring"</span><span class="p">))</span> <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">key</span><span class="p">,</span> <span class="n">get_json_object</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">jstring</span><span class="p">,</span> <span class="s1">'$.f1'</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"c0"</span><span class="p">),</span> \ <span class="gp">... </span> <span class="n">get_json_object</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">jstring</span><span class="p">,</span> <span class="s1">'$.f2'</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"c1"</span><span class="p">)</span> <span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(key=u'1', c0=u'value1', c1=u'value2'), Row(key=u'2', c0=u'value12', c1=None)]</span> +<span class="go">[Row(key='1', c0='value1', c1='value2'), Row(key='2', c0='value12', c1=None)]</span> </pre></div> </div> <div class="versionadded"> @@ -6331,7 +6331,7 @@ the grouping columns).</p> <a class="reference internal" href="#pyspark.sql.types.BinaryType" title="pyspark.sql.types.BinaryType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.BinaryType</span></code></a>, <a class="reference internal" href="#pyspark.sql.types.IntegerType" title="pyspark.sql.types.IntegerType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.IntegerType</span></code></a> or <a class="reference internal" href="#pyspark.sql.types.LongType" title="pyspark.sql.types.LongType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.LongType</span></code></a>.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="s1">'ABC'</span><span class="p">,</span> <span class="mi">3</span><span class="p">)],</span> <span class="p">[</span><span class="s1">'a'</span><span class="p">,</span> <span class="s1">'b'</span><span class="p">])</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="nb">hex</span><span class="p">(</span><span class="s1">'a'</span><span class="p">),</span> <span class="nb">hex</span><span class="p">(</span><span class="s1">'b'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> -<span class="go">[Row(hex(a)=u'414243', hex(b)=u'3')]</span> +<span class="go">[Row(hex(a)='414243', hex(b)='3')]</span> </pre></div> </div> <div class="versionadded"> @@ -6367,7 +6367,7 @@ the grouping columns).</p> <code class="descclassname">pyspark.sql.functions.</code><code class="descname">initcap</code><span class="sig-paren">(</span><em>col</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/functions.html#initcap"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.functions.initcap" title="Permalink to this definition">¶</a></dt> <dd><p>Translate the first letter of each word to upper case in the sentence.</p> <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="s1">'ab cd'</span><span class="p">,)],</span> <span class="p">[</span><span class="s1">'a'</span><span class="p">])</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">initcap</span><span class="p">(</span><span class="s2">"a"</span><span class="p">)</span><span class="o">.</span><spa
<TRUNCATED> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org