spark git commit: [SPARK-15850][SQL] Remove function grouping in SparkSession

hvanhovell Thu, 09 Jun 2016 18:58:43 -0700

Repository: spark
Updated Branches:
  refs/heads/master 4d9d9cc58 -> 16df133d7



[SPARK-15850][SQL] Remove function grouping in SparkSession

## What changes were proposed in this pull request?
SparkSession does not have that many functions due to better namespacing, and 
as a result we probably don't need the function grouping. This patch removes 
the grouping and also adds missing scaladocs for createDataset functions in 
SQLContext.

Closes #13577.

## How was this patch tested?
N/A - this is a documentation change.

Author: Reynold Xin <[email protected]>

Closes #13582 from rxin/SPARK-15850.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/16df133d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/16df133d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/16df133d

Branch: refs/heads/master
Commit: 16df133d7f5f3115cd5baa696fa73a4694f9cba9
Parents: 4d9d9cc
Author: Reynold Xin <[email protected]>
Authored: Thu Jun 9 18:58:24 2016 -0700
Committer: Herman van Hovell <[email protected]>
Committed: Thu Jun 9 18:58:24 2016 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/SQLContext.scala | 62 +++++++++++++++++++-
 .../org/apache/spark/sql/SparkSession.scala     | 28 ---------
 .../scala/org/apache/spark/sql/functions.scala  |  2 +-
 3 files changed, 61 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/16df133d/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 0fb2400..23f2b6e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -51,7 +51,7 @@ import org.apache.spark.sql.util.ExecutionListenerManager
  * @groupname specificdata Specific Data Sources
  * @groupname config Configuration
  * @groupname dataframes Custom DataFrame Creation
- * @groupname dataset Custom DataFrame Creation
+ * @groupname dataset Custom Dataset Creation
  * @groupname Ungrouped Support functions for language integrated queries
  * @since 1.0.0
  */
@@ -346,15 +346,73 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
     sparkSession.createDataFrame(rowRDD, schema, needsConversion)
   }
 
-
+  /**
+   * :: Experimental ::
+   * Creates a [[Dataset]] from a local Seq of data of a given type. This 
method requires an
+   * encoder (to convert a JVM object of type `T` to and from the internal 
Spark SQL representation)
+   * that is generally created automatically through implicits from a 
`SparkSession`, or can be
+   * created explicitly by calling static methods on [[Encoders]].
+   *
+   * == Example ==
+   *
+   * {{{
+   *
+   *   import spark.implicits._
+   *   case class Person(name: String, age: Long)
+   *   val data = Seq(Person("Michael", 29), Person("Andy", 30), 
Person("Justin", 19))
+   *   val ds = spark.createDataset(data)
+   *
+   *   ds.show()
+   *   // +-------+---+
+   *   // |   name|age|
+   *   // +-------+---+
+   *   // |Michael| 29|
+   *   // |   Andy| 30|
+   *   // | Justin| 19|
+   *   // +-------+---+
+   * }}}
+   *
+   * @since 2.0.0
+   * @group dataset
+   */
+  @Experimental
   def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = {
     sparkSession.createDataset(data)
   }
 
+  /**
+   * :: Experimental ::
+   * Creates a [[Dataset]] from an RDD of a given type. This method requires an
+   * encoder (to convert a JVM object of type `T` to and from the internal 
Spark SQL representation)
+   * that is generally created automatically through implicits from a 
`SparkSession`, or can be
+   * created explicitly by calling static methods on [[Encoders]].
+   *
+   * @since 2.0.0
+   * @group dataset
+   */
+  @Experimental
   def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = {
     sparkSession.createDataset(data)
   }
 
+  /**
+   * :: Experimental ::
+   * Creates a [[Dataset]] from a [[java.util.List]] of a given type. This 
method requires an
+   * encoder (to convert a JVM object of type `T` to and from the internal 
Spark SQL representation)
+   * that is generally created automatically through implicits from a 
`SparkSession`, or can be
+   * created explicitly by calling static methods on [[Encoders]].
+   *
+   * == Java Example ==
+   *
+   * {{{
+   *     List<String> data = Arrays.asList("hello", "world");
+   *     Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
+   * }}}
+   *
+   * @since 2.0.0
+   * @group dataset
+   */
+  @Experimental
   def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = {
     sparkSession.createDataset(data)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/16df133d/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index f5b16d0..01c2e3a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -119,7 +119,6 @@ class SparkSession private(
    * configurations that are relevant to Spark SQL. When getting the value of 
a config,
    * this defaults to the value set in the underlying [[SparkContext]], if any.
    *
-   * @group config
    * @since 2.0.0
    */
   @transient lazy val conf: RuntimeConfig = new 
RuntimeConfig(sessionState.conf)
@@ -129,7 +128,6 @@ class SparkSession private(
    * An interface to register custom 
[[org.apache.spark.sql.util.QueryExecutionListener]]s
    * that listen for execution metrics.
    *
-   * @group basic
    * @since 2.0.0
    */
   @Experimental
@@ -140,7 +138,6 @@ class SparkSession private(
    * A collection of methods that are considered experimental, but can be used 
to hook into
    * the query planner for advanced functionality.
    *
-   * @group basic
    * @since 2.0.0
    */
   @Experimental
@@ -175,7 +172,6 @@ class SparkSession private(
    *       DataTypes.StringType);
    * }}}
    *
-   * @group basic
    * @since 2.0.0
    */
   def udf: UDFRegistration = sessionState.udf
@@ -185,7 +181,6 @@ class SparkSession private(
    * Returns a [[ContinuousQueryManager]] that allows managing all the
    * [[ContinuousQuery ContinuousQueries]] active on `this`.
    *
-   * @group basic
    * @since 2.0.0
    */
   @Experimental
@@ -200,7 +195,6 @@ class SparkSession private(
    * and child sessions are set up with the same shared state. If the 
underlying catalog
    * implementation is Hive, this will initialize the metastore, which may 
take some time.
    *
-   * @group basic
    * @since 2.0.0
    */
   def newSession(): SparkSession = {
@@ -215,7 +209,6 @@ class SparkSession private(
   /**
    * Returns a [[DataFrame]] with no rows or columns.
    *
-   * @group dataframes
    * @since 2.0.0
    */
   @transient
@@ -239,7 +232,6 @@ class SparkSession private(
    * :: Experimental ::
    * Creates a [[DataFrame]] from an RDD of Product (e.g. case classes, 
tuples).
    *
-   * @group dataframes
    * @since 2.0.0
    */
   @Experimental
@@ -255,7 +247,6 @@ class SparkSession private(
    * :: Experimental ::
    * Creates a [[DataFrame]] from a local Seq of Product.
    *
-   * @group dataframes
    * @since 2.0.0
    */
   @Experimental
@@ -295,7 +286,6 @@ class SparkSession private(
    *  sparkSession.sql("select name from people").collect.foreach(println)
    * }}}
    *
-   * @group dataframes
    * @since 2.0.0
    */
   @DeveloperApi
@@ -309,7 +299,6 @@ class SparkSession private(
    * It is important to make sure that the structure of every [[Row]] of the 
provided RDD matches
    * the provided schema. Otherwise, there will be runtime exception.
    *
-   * @group dataframes
    * @since 2.0.0
    */
   @DeveloperApi
@@ -323,7 +312,6 @@ class SparkSession private(
    * It is important to make sure that the structure of every [[Row]] of the 
provided List matches
    * the provided schema. Otherwise, there will be runtime exception.
    *
-   * @group dataframes
    * @since 2.0.0
    */
   @DeveloperApi
@@ -337,7 +325,6 @@ class SparkSession private(
    * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
    * SELECT * queries will return the columns in an undefined order.
    *
-   * @group dataframes
    * @since 2.0.0
    */
   def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
@@ -357,7 +344,6 @@ class SparkSession private(
    * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
    * SELECT * queries will return the columns in an undefined order.
    *
-   * @group dataframes
    * @since 2.0.0
    */
   def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
@@ -369,7 +355,6 @@ class SparkSession private(
    *
    * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
    *          SELECT * queries will return the columns in an undefined order.
-   * @group dataframes
    * @since 1.6.0
    */
   def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame 
= {
@@ -382,7 +367,6 @@ class SparkSession private(
   /**
    * Convert a [[BaseRelation]] created for external data sources into a 
[[DataFrame]].
    *
-   * @group dataframes
    * @since 2.0.0
    */
   def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = {
@@ -420,7 +404,6 @@ class SparkSession private(
    * }}}
    *
    * @since 2.0.0
-   * @group dataset
    */
   @Experimental
   def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = {
@@ -439,7 +422,6 @@ class SparkSession private(
    * created explicitly by calling static methods on [[Encoders]].
    *
    * @since 2.0.0
-   * @group dataset
    */
   @Experimental
   def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = {
@@ -465,7 +447,6 @@ class SparkSession private(
    * }}}
    *
    * @since 2.0.0
-   * @group dataset
    */
   @Experimental
   def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = {
@@ -478,7 +459,6 @@ class SparkSession private(
    * in a range from 0 to `end` (exclusive) with step value 1.
    *
    * @since 2.0.0
-   * @group dataset
    */
   @Experimental
   def range(end: Long): Dataset[java.lang.Long] = range(0, end)
@@ -489,7 +469,6 @@ class SparkSession private(
    * in a range from `start` to `end` (exclusive) with step value 1.
    *
    * @since 2.0.0
-   * @group dataset
    */
   @Experimental
   def range(start: Long, end: Long): Dataset[java.lang.Long] = {
@@ -502,7 +481,6 @@ class SparkSession private(
    * in a range from `start` to `end` (exclusive) with a step value.
    *
    * @since 2.0.0
-   * @group dataset
    */
   @Experimental
   def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = {
@@ -516,7 +494,6 @@ class SparkSession private(
    * specified.
    *
    * @since 2.0.0
-   * @group dataset
    */
   @Experimental
   def range(start: Long, end: Long, step: Long, numPartitions: Int): 
Dataset[java.lang.Long] = {
@@ -565,7 +542,6 @@ class SparkSession private(
    * Interface through which the user may create, drop, alter or query 
underlying
    * databases, tables, functions etc.
    *
-   * @group ddl_ops
    * @since 2.0.0
    */
   @transient lazy val catalog: Catalog = new CatalogImpl(self)
@@ -573,7 +549,6 @@ class SparkSession private(
   /**
    * Returns the specified table as a [[DataFrame]].
    *
-   * @group ddl_ops
    * @since 2.0.0
    */
   def table(tableName: String): DataFrame = {
@@ -592,7 +567,6 @@ class SparkSession private(
    * Executes a SQL query using Spark, returning the result as a [[DataFrame]].
    * The dialect that is used for SQL parsing can be configured with 
'spark.sql.dialect'.
    *
-   * @group basic
    * @since 2.0.0
    */
   def sql(sqlText: String): DataFrame = {
@@ -606,7 +580,6 @@ class SparkSession private(
    *   sparkSession.read.schema(schema).json("/path/to/file.json")
    * }}}
    *
-   * @group genericdata
    * @since 2.0.0
    */
   def read: DataFrameReader = new DataFrameReader(self)
@@ -624,7 +597,6 @@ class SparkSession private(
    *   import sparkSession.implicits._
    * }}}
    *
-   * @group basic
    * @since 2.0.0
    */
   @Experimental

http://git-wip-us.apache.org/repos/asf/spark/blob/16df133d/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 4dbd166..02608b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2437,7 +2437,7 @@ object functions {
    */
   def minute(e: Column): Column = withExpr { Minute(e.expr) }
 
-  /*
+  /**
    * Returns number of months between dates `date1` and `date2`.
    * @group datetime_funcs
    * @since 1.5.0


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-15850][SQL] Remove function grouping in SparkSession

Reply via email to