http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala index d8405d1..4334b31 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala @@ -36,14 +36,14 @@ import org.apache.spark.mllib.tree.loss.{LogLoss, Loss, SquaredError} * @param validationTol validationTol is a condition which decides iteration termination when * runWithValidation is used. * The end of iteration is decided based on below logic: - * If the current loss on the validation set is > 0.01, the diff + * If the current loss on the validation set is greater than 0.01, the diff * of validation error is compared to relative tolerance which is * validationTol * (current loss on the validation set). - * If the current loss on the validation set is <= 0.01, the diff - * of validation error is compared to absolute tolerance which is + * If the current loss on the validation set is less than or equal to 0.01, + * the diff of validation error is compared to absolute tolerance which is * validationTol * 0.01. * Ignored when - * [[org.apache.spark.mllib.tree.GradientBoostedTrees.run()]] is used. + * `org.apache.spark.mllib.tree.GradientBoostedTrees.run()` is used. */ @Since("1.2.0") case class BoostingStrategy @Since("1.4.0") ( @@ -92,8 +92,8 @@ object BoostingStrategy { /** * Returns default configuration for the boosting algorithm * @param algo Learning goal. Supported: - * [[org.apache.spark.mllib.tree.configuration.Algo.Classification]], - * [[org.apache.spark.mllib.tree.configuration.Algo.Regression]] + * `org.apache.spark.mllib.tree.configuration.Algo.Classification`, + * `org.apache.spark.mllib.tree.configuration.Algo.Regression` * @return Configuration for boosting algorithm */ @Since("1.3.0")
http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala index b34e1b1..58e8f5b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala @@ -28,8 +28,8 @@ import org.apache.spark.mllib.tree.impurity.{Entropy, Gini, Impurity, Variance} /** * Stores all the configuration options for tree construction * @param algo Learning goal. Supported: - * [[org.apache.spark.mllib.tree.configuration.Algo.Classification]], - * [[org.apache.spark.mllib.tree.configuration.Algo.Regression]] + * `org.apache.spark.mllib.tree.configuration.Algo.Classification`, + * `org.apache.spark.mllib.tree.configuration.Algo.Regression` * @param impurity Criterion used for information gain calculation. * Supported for Classification: [[org.apache.spark.mllib.tree.impurity.Gini]], * [[org.apache.spark.mllib.tree.impurity.Entropy]]. @@ -43,9 +43,9 @@ import org.apache.spark.mllib.tree.impurity.{Entropy, Gini, Impurity, Variance} * for choosing how to split on features at each node. * More bins give higher granularity. * @param quantileCalculationStrategy Algorithm for calculating quantiles. Supported: - * [[org.apache.spark.mllib.tree.configuration.QuantileStrategy.Sort]] + * `org.apache.spark.mllib.tree.configuration.QuantileStrategy.Sort` * @param categoricalFeaturesInfo A map storing information about the categorical variables and the - * number of discrete values they take. An entry (n -> k) + * number of discrete values they take. An entry (n to k) * indicates that feature n is categorical with k categories * indexed from 0: {0, 1, ..., k-1}. * @param minInstancesPerNode Minimum number of instances each child must have after split. http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala index be2704d..bda5e66 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala @@ -25,7 +25,7 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType * Split applied to a feature * @param feature feature index * @param threshold Threshold for continuous feature. - * Split left if feature <= threshold, else right. + * Split left if feature is less than or equal to threshold, else right. * @param featureType type of feature -- categorical or continuous * @param categories Split left if categorical feature value is in this set, else right. */ http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/catalyst/src/main/scala/org/apache/spark/sql/InternalOutputModes.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/InternalOutputModes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/InternalOutputModes.scala index 153f9f5..594c41c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/InternalOutputModes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/InternalOutputModes.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql import org.apache.spark.sql.streaming.OutputMode /** - * Internal helper class to generate objects representing various [[OutputMode]]s, + * Internal helper class to generate objects representing various `OutputMode`s, */ private[sql] object InternalOutputModes { http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala index a821d2c..c362104 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala @@ -74,7 +74,7 @@ object Row { * It is invalid to use the native primitive interface to retrieve a value that is null, instead a * user must check `isNullAt` before attempting to retrieve a value that might be null. * - * To create a new Row, use [[RowFactory.create()]] in Java or [[Row.apply()]] in Scala. + * To create a new Row, use `RowFactory.create()` in Java or `Row.apply()` in Scala. * * A [[Row]] object can be constructed by providing field values. Example: * {{{ @@ -343,7 +343,7 @@ trait Row extends Serializable { } /** - * Returns a Map(name -> value) for the requested fieldNames + * Returns a Map consisting of names and values for the requested fieldNames * For primitive types if value is null it returns 'zero value' specific for primitive * ie. 0 for Int - use isNullAt to ensure that value is not null * http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala index cecad3b..4dc06fc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala @@ -92,7 +92,8 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType { } /** - * The default size of a value of the DecimalType is 8 bytes (precision <= 18) or 16 bytes. + * The default size of a value of the DecimalType is 8 bytes when precision is at most 18, + * and 16 bytes otherwise. */ override def defaultSize: Int = if (precision <= Decimal.MAX_LONG_DIGITS) 8 else 16 http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/Column.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index fa3b2b9..e99d786 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -97,7 +97,7 @@ class TypedColumn[-T, U]( } /** - * A column that will be computed based on the data in a [[DataFrame]]. + * A column that will be computed based on the data in a `DataFrame`. * * A new column is constructed based on the input columns present in a dataframe: * @@ -801,7 +801,7 @@ class Column(val expr: Expression) extends Logging { /** * An expression that gets an item at position `ordinal` out of an array, - * or gets a value by key `key` in a [[MapType]]. + * or gets a value by key `key` in a `MapType`. * * @group expr_ops * @since 1.3.0 @@ -809,7 +809,7 @@ class Column(val expr: Expression) extends Logging { def getItem(key: Any): Column = withExpr { UnresolvedExtractValue(expr, Literal(key)) } /** - * An expression that gets a field by name in a [[StructType]]. + * An expression that gets a field by name in a `StructType`. * * @group expr_ops * @since 1.3.0 @@ -1195,92 +1195,92 @@ class Column(val expr: Expression) extends Logging { class ColumnName(name: String) extends Column(name) { /** - * Creates a new [[StructField]] of type boolean. + * Creates a new `StructField` of type boolean. * @since 1.3.0 */ def boolean: StructField = StructField(name, BooleanType) /** - * Creates a new [[StructField]] of type byte. + * Creates a new `StructField` of type byte. * @since 1.3.0 */ def byte: StructField = StructField(name, ByteType) /** - * Creates a new [[StructField]] of type short. + * Creates a new `StructField` of type short. * @since 1.3.0 */ def short: StructField = StructField(name, ShortType) /** - * Creates a new [[StructField]] of type int. + * Creates a new `StructField` of type int. * @since 1.3.0 */ def int: StructField = StructField(name, IntegerType) /** - * Creates a new [[StructField]] of type long. + * Creates a new `StructField` of type long. * @since 1.3.0 */ def long: StructField = StructField(name, LongType) /** - * Creates a new [[StructField]] of type float. + * Creates a new `StructField` of type float. * @since 1.3.0 */ def float: StructField = StructField(name, FloatType) /** - * Creates a new [[StructField]] of type double. + * Creates a new `StructField` of type double. * @since 1.3.0 */ def double: StructField = StructField(name, DoubleType) /** - * Creates a new [[StructField]] of type string. + * Creates a new `StructField` of type string. * @since 1.3.0 */ def string: StructField = StructField(name, StringType) /** - * Creates a new [[StructField]] of type date. + * Creates a new `StructField` of type date. * @since 1.3.0 */ def date: StructField = StructField(name, DateType) /** - * Creates a new [[StructField]] of type decimal. + * Creates a new `StructField` of type decimal. * @since 1.3.0 */ def decimal: StructField = StructField(name, DecimalType.USER_DEFAULT) /** - * Creates a new [[StructField]] of type decimal. + * Creates a new `StructField` of type decimal. * @since 1.3.0 */ def decimal(precision: Int, scale: Int): StructField = StructField(name, DecimalType(precision, scale)) /** - * Creates a new [[StructField]] of type timestamp. + * Creates a new `StructField` of type timestamp. * @since 1.3.0 */ def timestamp: StructField = StructField(name, TimestampType) /** - * Creates a new [[StructField]] of type binary. + * Creates a new `StructField` of type binary. * @since 1.3.0 */ def binary: StructField = StructField(name, BinaryType) /** - * Creates a new [[StructField]] of type array. + * Creates a new `StructField` of type array. * @since 1.3.0 */ def array(dataType: DataType): StructField = StructField(name, ArrayType(dataType)) /** - * Creates a new [[StructField]] of type map. + * Creates a new `StructField` of type map. * @since 1.3.0 */ def map(keyType: DataType, valueType: DataType): StructField = @@ -1289,13 +1289,13 @@ class ColumnName(name: String) extends Column(name) { def map(mapType: MapType): StructField = StructField(name, mapType) /** - * Creates a new [[StructField]] of type struct. + * Creates a new `StructField` of type struct. * @since 1.3.0 */ def struct(fields: StructField*): StructField = struct(StructType(fields)) /** - * Creates a new [[StructField]] of type struct. + * Creates a new `StructField` of type struct. * @since 1.3.0 */ def struct(structType: StructType): StructField = StructField(name, structType) http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala index 0d43f09..184c5a1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.types._ /** - * Functionality for working with missing data in [[DataFrame]]s. + * Functionality for working with missing data in `DataFrame`s. * * @since 1.3.1 */ @@ -36,14 +36,14 @@ import org.apache.spark.sql.types._ final class DataFrameNaFunctions private[sql](df: DataFrame) { /** - * Returns a new [[DataFrame]] that drops rows containing any null or NaN values. + * Returns a new `DataFrame` that drops rows containing any null or NaN values. * * @since 1.3.1 */ def drop(): DataFrame = drop("any", df.columns) /** - * Returns a new [[DataFrame]] that drops rows containing null or NaN values. + * Returns a new `DataFrame` that drops rows containing null or NaN values. * * If `how` is "any", then drop rows containing any null or NaN values. * If `how` is "all", then drop rows only if every column is null or NaN for that row. @@ -53,7 +53,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { def drop(how: String): DataFrame = drop(how, df.columns) /** - * Returns a new [[DataFrame]] that drops rows containing any null or NaN values + * Returns a new `DataFrame` that drops rows containing any null or NaN values * in the specified columns. * * @since 1.3.1 @@ -61,7 +61,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { def drop(cols: Array[String]): DataFrame = drop(cols.toSeq) /** - * (Scala-specific) Returns a new [[DataFrame]] that drops rows containing any null or NaN values + * (Scala-specific) Returns a new `DataFrame` that drops rows containing any null or NaN values * in the specified columns. * * @since 1.3.1 @@ -69,7 +69,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { def drop(cols: Seq[String]): DataFrame = drop(cols.size, cols) /** - * Returns a new [[DataFrame]] that drops rows containing null or NaN values + * Returns a new `DataFrame` that drops rows containing null or NaN values * in the specified columns. * * If `how` is "any", then drop rows containing any null or NaN values in the specified columns. @@ -80,7 +80,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { def drop(how: String, cols: Array[String]): DataFrame = drop(how, cols.toSeq) /** - * (Scala-specific) Returns a new [[DataFrame]] that drops rows containing null or NaN values + * (Scala-specific) Returns a new `DataFrame` that drops rows containing null or NaN values * in the specified columns. * * If `how` is "any", then drop rows containing any null or NaN values in the specified columns. @@ -97,7 +97,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { } /** - * Returns a new [[DataFrame]] that drops rows containing + * Returns a new `DataFrame` that drops rows containing * less than `minNonNulls` non-null and non-NaN values. * * @since 1.3.1 @@ -105,7 +105,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { def drop(minNonNulls: Int): DataFrame = drop(minNonNulls, df.columns) /** - * Returns a new [[DataFrame]] that drops rows containing + * Returns a new `DataFrame` that drops rows containing * less than `minNonNulls` non-null and non-NaN values in the specified columns. * * @since 1.3.1 @@ -113,7 +113,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { def drop(minNonNulls: Int, cols: Array[String]): DataFrame = drop(minNonNulls, cols.toSeq) /** - * (Scala-specific) Returns a new [[DataFrame]] that drops rows containing less than + * (Scala-specific) Returns a new `DataFrame` that drops rows containing less than * `minNonNulls` non-null and non-NaN values in the specified columns. * * @since 1.3.1 @@ -126,21 +126,21 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { } /** - * Returns a new [[DataFrame]] that replaces null or NaN values in numeric columns with `value`. + * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`. * * @since 1.3.1 */ def fill(value: Double): DataFrame = fill(value, df.columns) /** - * Returns a new [[DataFrame]] that replaces null values in string columns with `value`. + * Returns a new `DataFrame` that replaces null values in string columns with `value`. * * @since 1.3.1 */ def fill(value: String): DataFrame = fill(value, df.columns) /** - * Returns a new [[DataFrame]] that replaces null or NaN values in specified numeric columns. + * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns. * If a specified column is not a numeric column, it is ignored. * * @since 1.3.1 @@ -148,7 +148,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { def fill(value: Double, cols: Array[String]): DataFrame = fill(value, cols.toSeq) /** - * (Scala-specific) Returns a new [[DataFrame]] that replaces null or NaN values in specified + * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified * numeric columns. If a specified column is not a numeric column, it is ignored. * * @since 1.3.1 @@ -167,7 +167,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { } /** - * Returns a new [[DataFrame]] that replaces null values in specified string columns. + * Returns a new `DataFrame` that replaces null values in specified string columns. * If a specified column is not a string column, it is ignored. * * @since 1.3.1 @@ -175,7 +175,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { def fill(value: String, cols: Array[String]): DataFrame = fill(value, cols.toSeq) /** - * (Scala-specific) Returns a new [[DataFrame]] that replaces null values in + * (Scala-specific) Returns a new `DataFrame` that replaces null values in * specified string columns. If a specified column is not a string column, it is ignored. * * @since 1.3.1 @@ -194,7 +194,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { } /** - * Returns a new [[DataFrame]] that replaces null values. + * Returns a new `DataFrame` that replaces null values. * * The key of the map is the column name, and the value of the map is the replacement value. * The value must be of the following type: @@ -213,7 +213,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { def fill(valueMap: java.util.Map[String, Any]): DataFrame = fill0(valueMap.asScala.toSeq) /** - * (Scala-specific) Returns a new [[DataFrame]] that replaces null values. + * (Scala-specific) Returns a new `DataFrame` that replaces null values. * * The key of the map is the column name, and the value of the map is the replacement value. * The value must be of the following type: `Int`, `Long`, `Float`, `Double`, `String`, `Boolean`. http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 5be9a99..1af2f9a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.types.StructType /** * Interface used to load a [[Dataset]] from external storage systems (e.g. file systems, - * key-value stores, etc). Use [[SparkSession.read]] to access this. + * key-value stores, etc). Use `SparkSession.read` to access this. * * @since 1.4.0 */ @@ -116,7 +116,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Loads input in as a [[DataFrame]], for data sources that don't require a path (e.g. external + * Loads input in as a `DataFrame`, for data sources that don't require a path (e.g. external * key-value stores). * * @since 1.4.0 @@ -126,7 +126,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by + * Loads input in as a `DataFrame`, for data sources that require a path (e.g. data backed by * a local or distributed file system). * * @since 1.4.0 @@ -136,7 +136,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Loads input in as a [[DataFrame]], for data sources that support multiple paths. + * Loads input in as a `DataFrame`, for data sources that support multiple paths. * Only works if the source is a HadoopFsRelationProvider. * * @since 1.6.0 @@ -153,7 +153,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Construct a [[DataFrame]] representing the database table accessible via JDBC URL + * Construct a `DataFrame` representing the database table accessible via JDBC URL * url named table and connection properties. * * @since 1.4.0 @@ -163,7 +163,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Construct a [[DataFrame]] representing the database table accessible via JDBC URL + * Construct a `DataFrame` representing the database table accessible via JDBC URL * url named table. Partitions of the table will be retrieved in parallel based on the parameters * passed to this function. * @@ -198,10 +198,10 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Construct a [[DataFrame]] representing the database table accessible via JDBC URL + * Construct a `DataFrame` representing the database table accessible via JDBC URL * url named table using connection properties. The `predicates` parameter gives a list * expressions suitable for inclusion in WHERE clauses; each one defines one partition - * of the [[DataFrame]]. + * of the `DataFrame`. * * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash * your external database systems. @@ -240,7 +240,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { /** * Loads a JSON file (<a href="http://jsonlines.org/">JSON Lines text format or - * newline-delimited JSON</a>) and returns the result as a [[DataFrame]]. + * newline-delimited JSON</a>) and returns the result as a `DataFrame`. * See the documentation on the overloaded `json()` method with varargs for more details. * * @since 1.4.0 @@ -252,7 +252,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { /** * Loads a JSON file (<a href="http://jsonlines.org/">JSON Lines text format or - * newline-delimited JSON</a>) and returns the result as a [[DataFrame]]. + * newline-delimited JSON</a>) and returns the result as a `DataFrame`. * * This function goes through the input once to determine the input schema. If you know the * schema in advance, use the version that specifies the schema to avoid the extra scan. @@ -299,7 +299,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { /** * Loads a `JavaRDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON * Lines text format or newline-delimited JSON</a>) and returns the result as - * a [[DataFrame]]. + * a `DataFrame`. * * Unless the schema is specified using [[schema]] function, this function goes through the * input once to determine the input schema. @@ -311,7 +311,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { /** * Loads an `RDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines - * text format or newline-delimited JSON</a>) and returns the result as a [[DataFrame]]. + * text format or newline-delimited JSON</a>) and returns the result as a `DataFrame`. * * Unless the schema is specified using [[schema]] function, this function goes through the * input once to determine the input schema. @@ -341,7 +341,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Loads a CSV file and returns the result as a [[DataFrame]]. See the documentation on the + * Loads a CSV file and returns the result as a `DataFrame`. See the documentation on the * other overloaded `csv()` method for more details. * * @since 2.0.0 @@ -352,7 +352,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Loads a CSV file and returns the result as a [[DataFrame]]. + * Loads a CSV file and returns the result as a `DataFrame`. * * This function will go through the input once to determine the input schema if `inferSchema` * is enabled. To avoid going through the entire data once, disable `inferSchema` option or @@ -392,7 +392,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSZZ`): sets the string that * indicates a timestamp format. Custom date formats follow the formats at * `java.text.SimpleDateFormat`. This applies to timestamp type.</li> - * `java.sql.Timestamp.valueOf()` and `java.sql.Date.valueOf()` or ISO 8601 format.</li> * <li>`maxColumns` (default `20480`): defines a hard limit of how many columns * a record can have.</li> * <li>`maxCharsPerColumn` (default `-1`): defines the maximum number of characters allowed @@ -415,7 +414,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { def csv(paths: String*): DataFrame = format("csv").load(paths : _*) /** - * Loads a Parquet file, returning the result as a [[DataFrame]]. See the documentation + * Loads a Parquet file, returning the result as a `DataFrame`. See the documentation * on the other overloaded `parquet()` method for more details. * * @since 2.0.0 @@ -426,7 +425,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Loads a Parquet file, returning the result as a [[DataFrame]]. + * Loads a Parquet file, returning the result as a `DataFrame`. * * You can set the following Parquet-specific option(s) for reading Parquet files: * <ul> @@ -442,7 +441,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Loads an ORC file and returns the result as a [[DataFrame]]. + * Loads an ORC file and returns the result as a `DataFrame`. * * @param path input path * @since 1.5.0 @@ -454,7 +453,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Loads an ORC file and returns the result as a [[DataFrame]]. + * Loads an ORC file and returns the result as a `DataFrame`. * * @param paths input paths * @since 2.0.0 @@ -464,7 +463,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { def orc(paths: String*): DataFrame = format("orc").load(paths: _*) /** - * Returns the specified table as a [[DataFrame]]. + * Returns the specified table as a `DataFrame`. * * @since 1.4.0 */ @@ -475,7 +474,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Loads text files and returns a [[DataFrame]] whose schema starts with a string column named + * Loads text files and returns a `DataFrame` whose schema starts with a string column named * "value", and followed by partitioned columns if there are any. See the documentation on * the other overloaded `text()` method for more details. * @@ -487,7 +486,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Loads text files and returns a [[DataFrame]] whose schema starts with a string column named + * Loads text files and returns a `DataFrame` whose schema starts with a string column named * "value", and followed by partitioned columns if there are any. * * Each line in the text files is a new row in the resulting DataFrame. For example: http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala index a9a861c..89c3a74 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.util.sketch.{BloomFilter, CountMinSketch} /** - * Statistic functions for [[DataFrame]]s. + * Statistic functions for `DataFrame`s. * * @since 1.4.0 */ @@ -44,7 +44,9 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * of `x` is close to (p * N). * More precisely, * - * floor((p - err) * N) <= rank(x) <= ceil((p + err) * N). + * {{{ + * floor((p - err) * N) <= rank(x) <= ceil((p + err) * N) + * }}} * * This method implements a variation of the Greenwald-Khanna algorithm (with some speed * optimizations). @@ -55,7 +57,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param probabilities a list of quantile probabilities * Each number must belong to [0, 1]. * For example 0 is the minimum, 0.5 is the median, 1 is the maximum. - * @param relativeError The relative target precision to achieve (>= 0). + * @param relativeError The relative target precision to achieve (greater or equal to 0). * If set to zero, the exact quantiles are computed, which could be very expensive. * Note that values greater than 1 are accepted but give the same result as 1. * @return the approximate quantiles at the given probabilities @@ -189,7 +191,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * The `support` should be greater than 1e-4. * * This function is meant for exploratory data analysis, as we make no guarantee about the - * backward compatibility of the schema of the resulting [[DataFrame]]. + * backward compatibility of the schema of the resulting `DataFrame`. * * @param cols the names of the columns to search frequent items in. * @param support The minimum frequency for an item to be considered `frequent`. Should be greater @@ -236,7 +238,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * Uses a `default` support of 1%. * * This function is meant for exploratory data analysis, as we make no guarantee about the - * backward compatibility of the schema of the resulting [[DataFrame]]. + * backward compatibility of the schema of the resulting `DataFrame`. * * @param cols the names of the columns to search frequent items in. * @return A Local DataFrame with the Array of frequent items for each column. @@ -254,7 +256,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * and Papadimitriou. * * This function is meant for exploratory data analysis, as we make no guarantee about the - * backward compatibility of the schema of the resulting [[DataFrame]]. + * backward compatibility of the schema of the resulting `DataFrame`. * * @param cols the names of the columns to search frequent items in. * @return A Local DataFrame with the Array of frequent items for each column. @@ -299,7 +301,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * Uses a `default` support of 1%. * * This function is meant for exploratory data analysis, as we make no guarantee about the - * backward compatibility of the schema of the resulting [[DataFrame]]. + * backward compatibility of the schema of the resulting `DataFrame`. * * @param cols the names of the columns to search frequent items in. * @return A Local DataFrame with the Array of frequent items for each column. @@ -317,7 +319,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * its fraction as zero. * @param seed random seed * @tparam T stratum type - * @return a new [[DataFrame]] that represents the stratified sample + * @return a new `DataFrame` that represents the stratified sample * * {{{ * val df = spark.createDataFrame(Seq((1, 1), (1, 2), (2, 1), (2, 1), (2, 3), (3, 2), @@ -354,7 +356,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * its fraction as zero. * @param seed random seed * @tparam T stratum type - * @return a new [[DataFrame]] that represents the stratified sample + * @return a new `DataFrame` that represents the stratified sample * * @since 1.5.0 */ @@ -369,7 +371,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param depth depth of the sketch * @param width width of the sketch * @param seed random seed - * @return a [[CountMinSketch]] over column `colName` + * @return a `CountMinSketch` over column `colName` * @since 2.0.0 */ def countMinSketch(colName: String, depth: Int, width: Int, seed: Int): CountMinSketch = { @@ -383,7 +385,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param eps relative error of the sketch * @param confidence confidence of the sketch * @param seed random seed - * @return a [[CountMinSketch]] over column `colName` + * @return a `CountMinSketch` over column `colName` * @since 2.0.0 */ def countMinSketch( @@ -398,7 +400,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param depth depth of the sketch * @param width width of the sketch * @param seed random seed - * @return a [[CountMinSketch]] over column `colName` + * @return a `CountMinSketch` over column `colName` * @since 2.0.0 */ def countMinSketch(col: Column, depth: Int, width: Int, seed: Int): CountMinSketch = { @@ -412,7 +414,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param eps relative error of the sketch * @param confidence confidence of the sketch * @param seed random seed - * @return a [[CountMinSketch]] over column `colName` + * @return a `CountMinSketch` over column `colName` * @since 2.0.0 */ def countMinSketch(col: Column, eps: Double, confidence: Double, seed: Int): CountMinSketch = { http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index 8294e41..fa8e8cb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.types.StructType /** * Interface used to write a [[Dataset]] to external storage systems (e.g. file systems, - * key-value stores, etc). Use [[Dataset.write]] to access this. + * key-value stores, etc). Use `Dataset.write` to access this. * * @since 1.4.0 */ @@ -189,7 +189,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } /** - * Saves the content of the [[DataFrame]] at the specified path. + * Saves the content of the `DataFrame` at the specified path. * * @since 1.4.0 */ @@ -199,7 +199,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } /** - * Saves the content of the [[DataFrame]] as the specified table. + * Saves the content of the `DataFrame` as the specified table. * * @since 1.4.0 */ @@ -215,8 +215,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { dataSource.write(mode, df) } /** - * Inserts the content of the [[DataFrame]] to the specified table. It requires that - * the schema of the [[DataFrame]] is the same as the schema of the table. + * Inserts the content of the `DataFrame` to the specified table. It requires that + * the schema of the `DataFrame` is the same as the schema of the table. * * @note Unlike `saveAsTable`, `insertInto` ignores the column names and just uses position-based * resolution. For example: @@ -322,15 +322,15 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } /** - * Saves the content of the [[DataFrame]] as the specified table. + * Saves the content of the `DataFrame` as the specified table. * * In the case the table already exists, behavior of this function depends on the * save mode, specified by the `mode` function (default to throwing an exception). - * When `mode` is `Overwrite`, the schema of the [[DataFrame]] does not need to be + * When `mode` is `Overwrite`, the schema of the `DataFrame` does not need to be * the same as that of the existing table. * * When `mode` is `Append`, if there is an existing table, we will use the format and options of - * the existing table. The column order in the schema of the [[DataFrame]] doesn't need to be same + * the existing table. The column order in the schema of the `DataFrame` doesn't need to be same * as that of the existing table. Unlike `insertInto`, `saveAsTable` will use the column names to * find the correct column positions. For example: * @@ -346,7 +346,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * +---+---+ * }}} * - * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input + * When the DataFrame is created from a non-partitioned `HadoopFsRelation` with a single input * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC * and Parquet), the table is persisted in a Hive compatible format, which means other systems * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL @@ -406,7 +406,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } /** - * Saves the content of the [[DataFrame]] to an external database table via JDBC. In the case the + * Saves the content of the `DataFrame` to an external database table via JDBC. In the case the * table already exists in the external database, behavior of this function depends on the * save mode, specified by the `mode` function (default to throwing an exception). * @@ -447,7 +447,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } /** - * Saves the content of the [[DataFrame]] in JSON format (<a href="http://jsonlines.org/"> + * Saves the content of the `DataFrame` in JSON format (<a href="http://jsonlines.org/"> * JSON Lines text format or newline-delimited JSON</a>) at the specified path. * This is equivalent to: * {{{ @@ -474,7 +474,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } /** - * Saves the content of the [[DataFrame]] in Parquet format at the specified path. + * Saves the content of the `DataFrame` in Parquet format at the specified path. * This is equivalent to: * {{{ * format("parquet").save(path) @@ -495,7 +495,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } /** - * Saves the content of the [[DataFrame]] in ORC format at the specified path. + * Saves the content of the `DataFrame` in ORC format at the specified path. * This is equivalent to: * {{{ * format("orc").save(path) @@ -516,7 +516,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } /** - * Saves the content of the [[DataFrame]] in a text file at the specified path. + * Saves the content of the `DataFrame` in a text file at the specified path. * The DataFrame must have only one column that is of string type. * Each row becomes a new line in the output file. For example: * {{{ @@ -541,7 +541,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } /** - * Saves the content of the [[DataFrame]] in CSV format at the specified path. + * Saves the content of the `DataFrame` in CSV format at the specified path. * This is equivalent to: * {{{ * format("csv").save(path) http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 7ba6ffc..fcc02e5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -68,7 +68,7 @@ private[sql] object Dataset { /** * A Dataset is a strongly typed collection of domain-specific objects that can be transformed * in parallel using functional or relational operations. Each Dataset also has an untyped view - * called a [[DataFrame]], which is a Dataset of [[Row]]. + * called a `DataFrame`, which is a Dataset of [[Row]]. * * Operations available on Datasets are divided into transformations and actions. Transformations * are the ones that produce new Datasets, and actions are the ones that trigger computation and @@ -363,7 +363,7 @@ class Dataset[T] private[sql]( * - When `U` is a tuple, the columns will be be mapped by ordinal (i.e. the first column will * be assigned to `_1`). * - When `U` is a primitive type (i.e. String, Int, etc), then the first column of the - * [[DataFrame]] will be used. + * `DataFrame` will be used. * * If the schema of the Dataset does not match the desired `U` type, you can use `select` * along with `alias` or `as` to rearrange or rename as required. @@ -377,7 +377,7 @@ class Dataset[T] private[sql]( /** * Converts this strongly typed collection of data to generic `DataFrame` with columns renamed. - * This can be quite convenient in conversion from an RDD of tuples into a [[DataFrame]] with + * This can be quite convenient in conversion from an RDD of tuples into a `DataFrame` with * meaningful names. For example: * {{{ * val rdd: RDD[(Int, String)] = ... @@ -472,8 +472,8 @@ class Dataset[T] private[sql]( /** * Returns true if this Dataset contains one or more sources that continuously * return data as it arrives. A Dataset that reads data from a streaming source - * must be executed as a [[StreamingQuery]] using the `start()` method in - * [[DataStreamWriter]]. Methods that return a single answer, e.g. `count()` or + * must be executed as a `StreamingQuery` using the `start()` method in + * `DataStreamWriter`. Methods that return a single answer, e.g. `count()` or * `collect()`, will throw an [[AnalysisException]] when there is a streaming * source present. * @@ -685,7 +685,7 @@ class Dataset[T] private[sql]( def stat: DataFrameStatFunctions = new DataFrameStatFunctions(toDF()) /** - * Join with another [[DataFrame]]. + * Join with another `DataFrame`. * * Behaves as an INNER JOIN and requires a subsequent join predicate. * @@ -699,7 +699,7 @@ class Dataset[T] private[sql]( } /** - * Inner equi-join with another [[DataFrame]] using the given column. + * Inner equi-join with another `DataFrame` using the given column. * * Different from other join functions, the join column will only appear once in the output, * i.e. similar to SQL's `JOIN USING` syntax. @@ -713,7 +713,7 @@ class Dataset[T] private[sql]( * @param usingColumn Name of the column to join on. This column must exist on both sides. * * @note If you perform a self-join using this function without aliasing the input - * [[DataFrame]]s, you will NOT be able to reference any columns after the join, since + * `DataFrame`s, you will NOT be able to reference any columns after the join, since * there is no way to disambiguate which side of the join you would like to reference. * * @group untypedrel @@ -724,7 +724,7 @@ class Dataset[T] private[sql]( } /** - * Inner equi-join with another [[DataFrame]] using the given columns. + * Inner equi-join with another `DataFrame` using the given columns. * * Different from other join functions, the join columns will only appear once in the output, * i.e. similar to SQL's `JOIN USING` syntax. @@ -738,7 +738,7 @@ class Dataset[T] private[sql]( * @param usingColumns Names of the columns to join on. This columns must exist on both sides. * * @note If you perform a self-join using this function without aliasing the input - * [[DataFrame]]s, you will NOT be able to reference any columns after the join, since + * `DataFrame`s, you will NOT be able to reference any columns after the join, since * there is no way to disambiguate which side of the join you would like to reference. * * @group untypedrel @@ -749,7 +749,7 @@ class Dataset[T] private[sql]( } /** - * Equi-join with another [[DataFrame]] using the given columns. + * Equi-join with another `DataFrame` using the given columns. * * Different from other join functions, the join columns will only appear once in the output, * i.e. similar to SQL's `JOIN USING` syntax. @@ -759,7 +759,7 @@ class Dataset[T] private[sql]( * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`. * * @note If you perform a self-join using this function without aliasing the input - * [[DataFrame]]s, you will NOT be able to reference any columns after the join, since + * `DataFrame`s, you will NOT be able to reference any columns after the join, since * there is no way to disambiguate which side of the join you would like to reference. * * @group untypedrel @@ -782,7 +782,7 @@ class Dataset[T] private[sql]( } /** - * Inner join with another [[DataFrame]], using the given join expression. + * Inner join with another `DataFrame`, using the given join expression. * * {{{ * // The following two are equivalent: @@ -796,7 +796,7 @@ class Dataset[T] private[sql]( def join(right: Dataset[_], joinExprs: Column): DataFrame = join(right, joinExprs, "inner") /** - * Join with another [[DataFrame]], using the given join expression. The following performs + * Join with another `DataFrame`, using the given join expression. The following performs * a full outer join between `df1` and `df2`. * * {{{ @@ -860,7 +860,7 @@ class Dataset[T] private[sql]( } /** - * Explicit cartesian join with another [[DataFrame]]. + * Explicit cartesian join with another `DataFrame`. * * @param right Right side of the join operation. * @@ -875,7 +875,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: - * Joins this Dataset returning a [[Tuple2]] for each pair where `condition` evaluates to + * Joins this Dataset returning a `Tuple2` for each pair where `condition` evaluates to * true. * * This is similar to the relation `join` function with one important difference in the @@ -956,7 +956,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: - * Using inner equi-join to join this Dataset returning a [[Tuple2]] for each pair + * Using inner equi-join to join this Dataset returning a `Tuple2` for each pair * where `condition` evaluates to true. * * @param other Right side of the join. @@ -2232,7 +2232,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[DataFrame]] that contains the result of applying a serialized R function + * Returns a new `DataFrame` that contains the result of applying a serialized R function * `func` to each partition. */ private[sql] def mapPartitionsInR( @@ -2446,7 +2446,7 @@ class Dataset[T] private[sql]( /** * Returns a new Dataset that has exactly `numPartitions` partitions. - * Similar to coalesce defined on an [[RDD]], this operation results in a narrow dependency, e.g. + * Similar to coalesce defined on an `RDD`, this operation results in a narrow dependency, e.g. * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of * the 100 new partitions will claim 10 of the current partitions. * @@ -2536,7 +2536,7 @@ class Dataset[T] private[sql]( def unpersist(): this.type = unpersist(blocking = false) /** - * Represents the content of the Dataset as an [[RDD]] of [[T]]. + * Represents the content of the Dataset as an `RDD` of [[T]]. * * @group basic * @since 1.6.0 @@ -2550,14 +2550,14 @@ class Dataset[T] private[sql]( } /** - * Returns the content of the Dataset as a [[JavaRDD]] of [[T]]s. + * Returns the content of the Dataset as a `JavaRDD` of [[T]]s. * @group basic * @since 1.6.0 */ def toJavaRDD: JavaRDD[T] = rdd.toJavaRDD() /** - * Returns the content of the Dataset as a [[JavaRDD]] of [[T]]s. + * Returns the content of the Dataset as a `JavaRDD` of [[T]]s. * @group basic * @since 1.6.0 */ http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala index 1163035..b94ad59 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala @@ -18,11 +18,10 @@ package org.apache.spark.sql import org.apache.spark.annotation.{Experimental, InterfaceStability} -import org.apache.spark.sql.streaming.StreamingQuery /** * :: Experimental :: - * A class to consume data generated by a [[StreamingQuery]]. Typically this is used to send the + * A class to consume data generated by a `StreamingQuery`. Typically this is used to send the * generated data to external systems. Each partition will use a new deserialized instance, so you * usually should do all the initialization (e.g. opening a connection or initiating a transaction) * in the `open` method. http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala index 31ce8eb..395d709 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala @@ -131,7 +131,7 @@ class KeyValueGroupedDataset[K, V] private[sql]( * This function does not support partial aggregation, and as a result requires shuffling all * the data in the [[Dataset]]. If an application intends to perform an aggregation over each * key, it is best to use the reduce function or an - * [[org.apache.spark.sql.expressions#Aggregator Aggregator]]. + * `org.apache.spark.sql.expressions#Aggregator`. * * Internally, the implementation will spill to disk if any given group is too large to fit into * memory. However, users must take care to avoid materializing the whole iterator for a group @@ -160,7 +160,7 @@ class KeyValueGroupedDataset[K, V] private[sql]( * This function does not support partial aggregation, and as a result requires shuffling all * the data in the [[Dataset]]. If an application intends to perform an aggregation over each * key, it is best to use the reduce function or an - * [[org.apache.spark.sql.expressions#Aggregator Aggregator]]. + * `org.apache.spark.sql.expressions#Aggregator`. * * Internally, the implementation will spill to disk if any given group is too large to fit into * memory. However, users must take care to avoid materializing the whole iterator for a group @@ -182,7 +182,7 @@ class KeyValueGroupedDataset[K, V] private[sql]( * This function does not support partial aggregation, and as a result requires shuffling all * the data in the [[Dataset]]. If an application intends to perform an aggregation over each * key, it is best to use the reduce function or an - * [[org.apache.spark.sql.expressions#Aggregator Aggregator]]. + * `org.apache.spark.sql.expressions#Aggregator`. * * Internally, the implementation will spill to disk if any given group is too large to fit into * memory. However, users must take care to avoid materializing the whole iterator for a group @@ -205,7 +205,7 @@ class KeyValueGroupedDataset[K, V] private[sql]( * This function does not support partial aggregation, and as a result requires shuffling all * the data in the [[Dataset]]. If an application intends to perform an aggregation over each * key, it is best to use the reduce function or an - * [[org.apache.spark.sql.expressions#Aggregator Aggregator]]. + * `org.apache.spark.sql.expressions#Aggregator`. * * Internally, the implementation will spill to disk if any given group is too large to fit into * memory. However, users must take care to avoid materializing the whole iterator for a group http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala index f019d1e..0fe8d87 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.types.NumericType import org.apache.spark.sql.types.StructType /** - * A set of methods for aggregations on a [[DataFrame]], created by [[Dataset.groupBy]]. + * A set of methods for aggregations on a `DataFrame`, created by `Dataset.groupBy`. * * The main method is the agg function, which has multiple variants. This class also contains * convenience some first order statistics such as mean, sum for convenience. @@ -129,7 +129,7 @@ class RelationalGroupedDataset protected[sql]( /** * (Scala-specific) Compute aggregates by specifying the column names and - * aggregate methods. The resulting [[DataFrame]] will also contain the grouping columns. + * aggregate methods. The resulting `DataFrame` will also contain the grouping columns. * * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`. * {{{ @@ -150,7 +150,7 @@ class RelationalGroupedDataset protected[sql]( /** * (Scala-specific) Compute aggregates by specifying a map from column name to - * aggregate methods. The resulting [[DataFrame]] will also contain the grouping columns. + * aggregate methods. The resulting `DataFrame` will also contain the grouping columns. * * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`. * {{{ @@ -171,7 +171,7 @@ class RelationalGroupedDataset protected[sql]( /** * (Java-specific) Compute aggregates by specifying a map from column name to - * aggregate methods. The resulting [[DataFrame]] will also contain the grouping columns. + * aggregate methods. The resulting `DataFrame` will also contain the grouping columns. * * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`. * {{{ @@ -228,7 +228,7 @@ class RelationalGroupedDataset protected[sql]( /** * Count the number of rows for each group. - * The resulting [[DataFrame]] will also contain the grouping columns. + * The resulting `DataFrame` will also contain the grouping columns. * * @since 1.3.0 */ @@ -236,7 +236,7 @@ class RelationalGroupedDataset protected[sql]( /** * Compute the average value for each numeric columns for each group. This is an alias for `avg`. - * The resulting [[DataFrame]] will also contain the grouping columns. + * The resulting `DataFrame` will also contain the grouping columns. * When specified columns are given, only compute the average values for them. * * @since 1.3.0 @@ -248,7 +248,7 @@ class RelationalGroupedDataset protected[sql]( /** * Compute the max value for each numeric columns for each group. - * The resulting [[DataFrame]] will also contain the grouping columns. + * The resulting `DataFrame` will also contain the grouping columns. * When specified columns are given, only compute the max values for them. * * @since 1.3.0 @@ -260,7 +260,7 @@ class RelationalGroupedDataset protected[sql]( /** * Compute the mean value for each numeric columns for each group. - * The resulting [[DataFrame]] will also contain the grouping columns. + * The resulting `DataFrame` will also contain the grouping columns. * When specified columns are given, only compute the mean values for them. * * @since 1.3.0 @@ -272,7 +272,7 @@ class RelationalGroupedDataset protected[sql]( /** * Compute the min value for each numeric column for each group. - * The resulting [[DataFrame]] will also contain the grouping columns. + * The resulting `DataFrame` will also contain the grouping columns. * When specified columns are given, only compute the min values for them. * * @since 1.3.0 @@ -284,7 +284,7 @@ class RelationalGroupedDataset protected[sql]( /** * Compute the sum for each numeric columns for each group. - * The resulting [[DataFrame]] will also contain the grouping columns. + * The resulting `DataFrame` will also contain the grouping columns. * When specified columns are given, only compute the sum for them. * * @since 1.3.0 @@ -295,7 +295,7 @@ class RelationalGroupedDataset protected[sql]( } /** - * Pivots a column of the current [[DataFrame]] and perform the specified aggregation. + * Pivots a column of the current `DataFrame` and perform the specified aggregation. * There are two versions of pivot function: one that requires the caller to specify the list * of distinct values to pivot on, and one that does not. The latter is more concise but less * efficient, because Spark needs to first compute the list of distinct values internally. @@ -335,7 +335,7 @@ class RelationalGroupedDataset protected[sql]( } /** - * Pivots a column of the current [[DataFrame]] and perform the specified aggregation. + * Pivots a column of the current `DataFrame` and perform the specified aggregation. * There are two versions of pivot function: one that requires the caller to specify the list * of distinct values to pivot on, and one that does not. The latter is more concise but less * efficient, because Spark needs to first compute the list of distinct values internally. @@ -367,7 +367,7 @@ class RelationalGroupedDataset protected[sql]( } /** - * Pivots a column of the current [[DataFrame]] and perform the specified aggregation. + * Pivots a column of the current `DataFrame` and perform the specified aggregation. * There are two versions of pivot function: one that requires the caller to specify the list * of distinct values to pivot on, and one that does not. The latter is more concise but less * efficient, because Spark needs to first compute the list of distinct values internally. @@ -392,12 +392,12 @@ class RelationalGroupedDataset protected[sql]( * Applies the given serialized R function `func` to each group of data. For each unique group, * the function will be passed the group key and an iterator that contains all of the elements in * the group. The function can return an iterator containing elements of an arbitrary type which - * will be returned as a new [[DataFrame]]. + * will be returned as a new `DataFrame`. * * This function does not support partial aggregation, and as a result requires shuffling all * the data in the [[Dataset]]. If an application intends to perform an aggregation over each * key, it is best to use the reduce function or an - * [[org.apache.spark.sql.expressions#Aggregator Aggregator]]. + * `org.apache.spark.sql.expressions#Aggregator`. * * Internally, the implementation will spill to disk if any given group is too large to fit into * memory. However, users must take care to avoid materializing the whole iterator for a group http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala index 9108d19..edfcd7d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} /** - * Runtime configuration interface for Spark. To access this, use [[SparkSession.conf]]. + * Runtime configuration interface for Spark. To access this, use `SparkSession.conf`. * * Options set here are automatically propagated to the Hadoop configuration during I/O. * @@ -65,7 +65,8 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) { /** * Returns the value of Spark runtime configuration property for the given key. * - * @throws NoSuchElementException if the key is not set and does not have a default value + * @throws java.util.NoSuchElementException if the key is not set and does not have a default + * value * @since 2.0.0 */ @throws[NoSuchElementException]("if the key is not set") http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 858fa4c..6554359 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -84,7 +84,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * Returns a [[SQLContext]] as new session, with separated SQL configurations, temporary - * tables, registered functions, but sharing the same [[SparkContext]], cached data and + * tables, registered functions, but sharing the same `SparkContext`, cached data and * other things. * * @since 1.6.0 @@ -883,7 +883,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * Loads an JavaRDD<String> storing JSON objects (one object per record) and applies the given + * Loads an JavaRDD[String] storing JSON objects (one object per record) and applies the given * schema, returning the result as a `DataFrame`. * * @group specificdata http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index 71b1880..08d74ac 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -93,7 +93,7 @@ class SparkSession private( * ----------------------- */ /** - * State shared across sessions, including the [[SparkContext]], cached data, listener, + * State shared across sessions, including the `SparkContext`, cached data, listener, * and a catalog that interacts with external systems. */ @transient @@ -125,7 +125,7 @@ class SparkSession private( * * This is the interface through which the user can get and set all Spark and Hadoop * configurations that are relevant to Spark SQL. When getting the value of a config, - * this defaults to the value set in the underlying [[SparkContext]], if any. + * this defaults to the value set in the underlying `SparkContext`, if any. * * @since 2.0.0 */ @@ -189,8 +189,8 @@ class SparkSession private( /** * :: Experimental :: - * Returns a [[StreamingQueryManager]] that allows managing all the - * [[StreamingQuery StreamingQueries]] active on `this`. + * Returns a `StreamingQueryManager` that allows managing all the + * `StreamingQuery`s active on `this`. * * @since 2.0.0 */ @@ -200,9 +200,9 @@ class SparkSession private( /** * Start a new session with isolated SQL configurations, temporary tables, registered - * functions are isolated, but sharing the underlying [[SparkContext]] and cached data. + * functions are isolated, but sharing the underlying `SparkContext` and cached data. * - * @note Other than the [[SparkContext]], all shared state is initialized lazily. + * @note Other than the `SparkContext`, all shared state is initialized lazily. * This method will force the initialization of the shared state to ensure that parent * and child sessions are set up with the same shared state. If the underlying catalog * implementation is Hive, this will initialize the metastore, which may take some time. @@ -219,7 +219,7 @@ class SparkSession private( * --------------------------------- */ /** - * Returns a [[DataFrame]] with no rows or columns. + * Returns a `DataFrame` with no rows or columns. * * @since 2.0.0 */ @@ -243,7 +243,7 @@ class SparkSession private( /** * :: Experimental :: - * Creates a [[DataFrame]] from an RDD of Product (e.g. case classes, tuples). + * Creates a `DataFrame` from an RDD of Product (e.g. case classes, tuples). * * @since 2.0.0 */ @@ -257,7 +257,7 @@ class SparkSession private( /** * :: Experimental :: - * Creates a [[DataFrame]] from a local Seq of Product. + * Creates a `DataFrame` from a local Seq of Product. * * @since 2.0.0 */ @@ -272,7 +272,7 @@ class SparkSession private( /** * :: DeveloperApi :: - * Creates a [[DataFrame]] from an [[RDD]] containing [[Row]]s using the given schema. + * Creates a `DataFrame` from an `RDD` containing [[Row]]s using the given schema. * It is important to make sure that the structure of every [[Row]] of the provided RDD matches * the provided schema. Otherwise, there will be runtime exception. * Example: @@ -309,7 +309,7 @@ class SparkSession private( /** * :: DeveloperApi :: - * Creates a [[DataFrame]] from a [[JavaRDD]] containing [[Row]]s using the given schema. + * Creates a `DataFrame` from a `JavaRDD` containing [[Row]]s using the given schema. * It is important to make sure that the structure of every [[Row]] of the provided RDD matches * the provided schema. Otherwise, there will be runtime exception. * @@ -323,7 +323,7 @@ class SparkSession private( /** * :: DeveloperApi :: - * Creates a [[DataFrame]] from a [[java.util.List]] containing [[Row]]s using the given schema. + * Creates a `DataFrame` from a [[java.util.List]] containing [[Row]]s using the given schema. * It is important to make sure that the structure of every [[Row]] of the provided List matches * the provided schema. Otherwise, there will be runtime exception. * @@ -381,7 +381,7 @@ class SparkSession private( } /** - * Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]]. + * Convert a `BaseRelation` created for external data sources into a `DataFrame`. * * @since 2.0.0 */ @@ -470,7 +470,7 @@ class SparkSession private( /** * :: Experimental :: - * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements + * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements * in a range from 0 to `end` (exclusive) with step value 1. * * @since 2.0.0 @@ -481,7 +481,7 @@ class SparkSession private( /** * :: Experimental :: - * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements + * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements * in a range from `start` to `end` (exclusive) with step value 1. * * @since 2.0.0 @@ -494,7 +494,7 @@ class SparkSession private( /** * :: Experimental :: - * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements + * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements * in a range from `start` to `end` (exclusive) with a step value. * * @since 2.0.0 @@ -507,7 +507,7 @@ class SparkSession private( /** * :: Experimental :: - * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements + * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements * in a range from `start` to `end` (exclusive) with a step value, with partition number * specified. * @@ -520,7 +520,7 @@ class SparkSession private( } /** - * Creates a [[DataFrame]] from an RDD[Row]. + * Creates a `DataFrame` from an RDD[Row]. * User can specify whether the input rows should be converted to Catalyst rows. */ private[sql] def internalCreateDataFrame( @@ -533,7 +533,7 @@ class SparkSession private( } /** - * Creates a [[DataFrame]] from an RDD[Row]. + * Creates a `DataFrame` from an RDD[Row]. * User can specify whether the input rows should be converted to Catalyst rows. */ private[sql] def createDataFrame( @@ -566,7 +566,7 @@ class SparkSession private( @transient lazy val catalog: Catalog = new CatalogImpl(self) /** - * Returns the specified table as a [[DataFrame]]. + * Returns the specified table as a `DataFrame`. * * @since 2.0.0 */ @@ -583,7 +583,7 @@ class SparkSession private( * ----------------- */ /** - * Executes a SQL query using Spark, returning the result as a [[DataFrame]]. + * Executes a SQL query using Spark, returning the result as a `DataFrame`. * The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'. * * @since 2.0.0 @@ -594,7 +594,7 @@ class SparkSession private( /** * Returns a [[DataFrameReader]] that can be used to read non-streaming data in as a - * [[DataFrame]]. + * `DataFrame`. * {{{ * sparkSession.read.parquet("/path/to/file.parquet") * sparkSession.read.schema(schema).json("/path/to/file.json") @@ -606,7 +606,7 @@ class SparkSession private( /** * :: Experimental :: - * Returns a [[DataStreamReader]] that can be used to read streaming data in as a [[DataFrame]]. + * Returns a `DataStreamReader` that can be used to read streaming data in as a `DataFrame`. * {{{ * sparkSession.readStream.parquet("/path/to/directory/of/parquet/files") * sparkSession.readStream.schema(schema).json("/path/to/directory/of/json/files") @@ -624,7 +624,7 @@ class SparkSession private( /** * :: Experimental :: * (Scala-specific) Implicit methods available in Scala for converting - * common Scala objects into [[DataFrame]]s. + * common Scala objects into `DataFrame`s. * * {{{ * val sparkSession = SparkSession.builder.getOrCreate() @@ -641,7 +641,7 @@ class SparkSession private( // scalastyle:on /** - * Stop the underlying [[SparkContext]]. + * Stop the underlying `SparkContext`. * * @since 2.0.0 */ @@ -726,7 +726,7 @@ object SparkSession { /** * Sets a config option. Options set using this method are automatically propagated to - * both [[SparkConf]] and SparkSession's own configuration. + * both `SparkConf` and SparkSession's own configuration. * * @since 2.0.0 */ @@ -737,7 +737,7 @@ object SparkSession { /** * Sets a config option. Options set using this method are automatically propagated to - * both [[SparkConf]] and SparkSession's own configuration. + * both `SparkConf` and SparkSession's own configuration. * * @since 2.0.0 */ @@ -748,7 +748,7 @@ object SparkSession { /** * Sets a config option. Options set using this method are automatically propagated to - * both [[SparkConf]] and SparkSession's own configuration. + * both `SparkConf` and SparkSession's own configuration. * * @since 2.0.0 */ @@ -759,7 +759,7 @@ object SparkSession { /** * Sets a config option. Options set using this method are automatically propagated to - * both [[SparkConf]] and SparkSession's own configuration. + * both `SparkConf` and SparkSession's own configuration. * * @since 2.0.0 */ @@ -769,7 +769,7 @@ object SparkSession { } /** - * Sets a list of config options based on the given [[SparkConf]]. + * Sets a list of config options based on the given `SparkConf`. * * @since 2.0.0 */ http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala index 6043c5e..c8be89c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.types.{DataType, DataTypes} import org.apache.spark.util.Utils /** - * Functions for registering user-defined functions. Use [[SQLContext.udf]] to access this. + * Functions for registering user-defined functions. Use `SQLContext.udf` to access this. * * @note The user-defined functions must be deterministic. * http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/functions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index d5940c6..650439a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -474,7 +474,9 @@ object functions { /** * Aggregate function: returns the level of grouping, equals to * - * (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn) + * {{{ + * (grouping(c1) <<; (n-1)) + (grouping(c2) <<; (n-2)) + ... + grouping(cn) + * }}} * * @note The list of columns should match with grouping columns exactly, or empty (means all the * grouping columns). @@ -487,7 +489,9 @@ object functions { /** * Aggregate function: returns the level of grouping, equals to * - * (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn) + * {{{ + * (grouping(c1) <<; (n-1)) + (grouping(c2) <<; (n-2)) + ... + grouping(cn) + * }}} * * @note The list of columns should match with grouping columns exactly. * @@ -1048,9 +1052,12 @@ object functions { * within each partition in the lower 33 bits. The assumption is that the data frame has * less than 1 billion partitions, and each partition has less than 8 billion records. * - * As an example, consider a [[DataFrame]] with two partitions, each with 3 records. + * As an example, consider a `DataFrame` with two partitions, each with 3 records. * This expression would return the following IDs: + * + * {{{ * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594. + * }}} * * @group normal_funcs * @since 1.4.0 @@ -1066,9 +1073,12 @@ object functions { * within each partition in the lower 33 bits. The assumption is that the data frame has * less than 1 billion partitions, and each partition has less than 8 billion records. * - * As an example, consider a [[DataFrame]] with two partitions, each with 3 records. + * As an example, consider a `DataFrame` with two partitions, each with 3 records. * This expression would return the following IDs: + * + * {{{ * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594. + * }}} * * @group normal_funcs * @since 1.6.0 @@ -1184,7 +1194,7 @@ object functions { /** * Creates a new struct column. - * If the input column is a column in a [[DataFrame]], or a derived column expression + * If the input column is a column in a `DataFrame`, or a derived column expression * that is named (i.e. aliased), its name would be remained as the StructField's name, * otherwise, the newly generated StructField's name would be auto generated as col${index + 1}, * i.e. col1, col2, col3, ... @@ -1846,8 +1856,8 @@ object functions { def round(e: Column): Column = round(e, 0) /** - * Round the value of `e` to `scale` decimal places if `scale` >= 0 - * or at integral part when `scale` < 0. + * Round the value of `e` to `scale` decimal places if `scale` is greater than or equal to 0 + * or at integral part when `scale` is less than 0. * * @group math_funcs * @since 1.5.0 @@ -1864,7 +1874,7 @@ object functions { /** * Round the value of `e` to `scale` decimal places with HALF_EVEN round mode - * if `scale` >= 0 or at integral part when `scale` < 0. + * if `scale` is greater than or equal to 0 or at integral part when `scale` is less than 0. * * @group math_funcs * @since 2.0.0 @@ -2172,7 +2182,7 @@ object functions { * and returns the result as a string column. * * If d is 0, the result has no decimal point or fractional part. - * If d < 0, the result will be null. + * If d is less than 0, the result will be null. * * @group string_funcs * @since 1.5.0 @@ -2888,7 +2898,7 @@ object functions { } /** - * (Scala-specific) Parses a column containing a JSON string into a [[StructType]] with the + * (Scala-specific) Parses a column containing a JSON string into a `StructType` with the * specified schema. Returns `null`, in the case of an unparseable string. * * @param e a string column containing JSON data. @@ -2904,7 +2914,7 @@ object functions { } /** - * (Java-specific) Parses a column containing a JSON string into a [[StructType]] with the + * (Java-specific) Parses a column containing a JSON string into a `StructType` with the * specified schema. Returns `null`, in the case of an unparseable string. * * @param e a string column containing JSON data. @@ -2919,7 +2929,7 @@ object functions { from_json(e, schema, options.asScala.toMap) /** - * Parses a column containing a JSON string into a [[StructType]] with the specified schema. + * Parses a column containing a JSON string into a `StructType` with the specified schema. * Returns `null`, in the case of an unparseable string. * * @param e a string column containing JSON data. @@ -2932,7 +2942,7 @@ object functions { from_json(e, schema, Map.empty[String, String]) /** - * Parses a column containing a JSON string into a [[StructType]] with the specified schema. + * Parses a column containing a JSON string into a `StructType` with the specified schema. * Returns `null`, in the case of an unparseable string. * * @param e a string column containing JSON data. @@ -2946,7 +2956,7 @@ object functions { /** - * (Scala-specific) Converts a column containing a [[StructType]] into a JSON string with the + * (Scala-specific) Converts a column containing a `StructType` into a JSON string with the * specified schema. Throws an exception, in the case of an unsupported type. * * @param e a struct column. @@ -2961,7 +2971,7 @@ object functions { } /** - * (Java-specific) Converts a column containing a [[StructType]] into a JSON string with the + * (Java-specific) Converts a column containing a `StructType` into a JSON string with the * specified schema. Throws an exception, in the case of an unsupported type. * * @param e a struct column. @@ -2975,7 +2985,7 @@ object functions { to_json(e, options.asScala.toMap) /** - * Converts a column containing a [[StructType]] into a JSON string with the + * Converts a column containing a `StructType` into a JSON string with the * specified schema. Throws an exception, in the case of an unsupported type. * * @param e a struct column. http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala index d3e323c..6d98462 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.types.StructType /** - * Internal implementation of the user-facing [[Catalog]]. + * Internal implementation of the user-facing `Catalog`. */ class CatalogImpl(sparkSession: SparkSession) extends Catalog { @@ -175,8 +175,8 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { } /** - * Get the database with the specified name. This throws an [[AnalysisException]] when no - * [[Database]] can be found. + * Get the database with the specified name. This throws an `AnalysisException` when no + * `Database` can be found. */ override def getDatabase(dbName: String): Database = { makeDatabase(dbName) @@ -184,7 +184,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { /** * Get the table or view with the specified name. This table can be a temporary view or a - * table/view in the current database. This throws an [[AnalysisException]] when no [[Table]] + * table/view in the current database. This throws an `AnalysisException` when no `Table` * can be found. */ override def getTable(tableName: String): Table = { @@ -193,7 +193,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { /** * Get the table or view with the specified name in the specified database. This throws an - * [[AnalysisException]] when no [[Table]] can be found. + * `AnalysisException` when no `Table` can be found. */ override def getTable(dbName: String, tableName: String): Table = { makeTable(TableIdentifier(tableName, Option(dbName))) @@ -201,7 +201,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { /** * Get the function with the specified name. This function can be a temporary function or a - * function in the current database. This throws an [[AnalysisException]] when no [[Function]] + * function in the current database. This throws an `AnalysisException` when no `Function` * can be found. */ override def getFunction(functionName: String): Function = { @@ -209,7 +209,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { } /** - * Get the function with the specified name. This returns [[None]] when no [[Function]] can be + * Get the function with the specified name. This returns `None` when no `Function` can be * found. */ override def getFunction(dbName: String, functionName: String): Function = { http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala index 791a9cf..4e7c813 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala @@ -23,7 +23,7 @@ import org.apache.spark.internal.config._ * A helper class that enables substitution using syntax like * `${var}`, `${system:var}` and `${env:var}`. * - * Variable substitution is controlled by [[SQLConf.variableSubstituteEnabled]]. + * Variable substitution is controlled by `SQLConf.variableSubstituteEnabled`. */ class VariableSubstitution(conf: SQLConf) { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
