This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 54a6a6871c7e [SPARK-55479][SQL] Fix style issues in SparkShreddingUtils
54a6a6871c7e is described below
commit 54a6a6871c7ea73e1e70dc52d8b953ef5b24f996
Author: manuzhang <[email protected]>
AuthorDate: Sat Feb 14 13:46:34 2026 -0800
[SPARK-55479][SQL] Fix style issues in SparkShreddingUtils
### What changes were proposed in this pull request?
Fix style issues in the class SparkShreddingUtils.
### Why are the changes needed?
It would be nice to keep consistent code style, improve readability and
reduce potential risks.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing tests.
### Was this patch authored or co-authored using generative AI tooling?
NO.
Closes #54262 from manuzhang/SPARK-55479.
Authored-by: manuzhang <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../datasources/parquet/SparkShreddingUtils.scala | 51 ++++++++++++----------
1 file changed, 27 insertions(+), 24 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala
index 0f78d740e0bf..0426b41c6b7a 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala
@@ -446,11 +446,11 @@ private[this] final class ScalarReader(
}
case object SparkShreddingUtils {
- val VariantValueFieldName = "value";
- val TypedValueFieldName = "typed_value";
- val MetadataFieldName = "metadata";
+ private val VARIANT_VALUE_FIELD_NAME = "value"
+ private val TYPED_VALUE_FIELD_NAME = "typed_value"
+ private val METADATA_FIELD_NAME = "metadata"
- val VARIANT_WRITE_SHREDDING_KEY: String = "__VARIANT_WRITE_SHREDDING_KEY"
+ private val VARIANT_WRITE_SHREDDING_KEY: String =
"__VARIANT_WRITE_SHREDDING_KEY"
def buildVariantSchema(schema: DataType): VariantSchema = {
schema match {
@@ -481,32 +481,34 @@ case object SparkShreddingUtils {
// Always set containsNull to false. One of value or typed_value must
always be set for
// array elements.
val arrayShreddingSchema =
- ArrayType(variantShreddingSchema(elementType, false, false),
containsNull = false)
+ ArrayType(variantShreddingSchema(elementType, isTopLevel = false,
+ isObjectField = false), containsNull = false)
Seq(
- StructField(VariantValueFieldName, BinaryType, nullable = true),
- StructField(TypedValueFieldName, arrayShreddingSchema, nullable =
true)
+ StructField(VARIANT_VALUE_FIELD_NAME, BinaryType, nullable = true),
+ StructField(TYPED_VALUE_FIELD_NAME, arrayShreddingSchema, nullable =
true)
)
case StructType(fields) =>
// The field name level is always non-nullable: Variant null values
are represented in the
- // "value" columna as "00", and missing values are represented by
setting both "value" and
+ // "value" column as "00", and missing values are represented by
setting both "value" and
// "typed_value" to null.
val objectShreddingSchema = StructType(fields.map(f =>
- f.copy(dataType = variantShreddingSchema(f.dataType, false, true),
nullable = false)))
+ f.copy(dataType = variantShreddingSchema(f.dataType, isTopLevel =
false,
+ isObjectField = true), nullable = false)))
Seq(
- StructField(VariantValueFieldName, BinaryType, nullable = true),
- StructField(TypedValueFieldName, objectShreddingSchema, nullable =
true)
+ StructField(VARIANT_VALUE_FIELD_NAME, BinaryType, nullable = true),
+ StructField(TYPED_VALUE_FIELD_NAME, objectShreddingSchema, nullable
= true)
)
case VariantType =>
// For Variant, we don't need a typed column. If there is no typed
column, value is required
// for array elements or top-level fields, but optional for objects
(where a null represents
// a missing field).
Seq(
- StructField(VariantValueFieldName, BinaryType, nullable =
isObjectField)
+ StructField(VARIANT_VALUE_FIELD_NAME, BinaryType, nullable =
isObjectField)
)
case _: NumericType | BooleanType | _: StringType | BinaryType | _:
DatetimeType =>
Seq(
- StructField(VariantValueFieldName, BinaryType, nullable = true),
- StructField(TypedValueFieldName, dataType, nullable = true)
+ StructField(VARIANT_VALUE_FIELD_NAME, BinaryType, nullable = true),
+ StructField(TYPED_VALUE_FIELD_NAME, dataType, nullable = true)
)
case _ =>
// No other types have a corresponding shreddings schema.
@@ -514,7 +516,7 @@ case object SparkShreddingUtils {
}
if (isTopLevel) {
- StructType(StructField(MetadataFieldName, BinaryType, nullable = false)
+: fields)
+ StructType(StructField(METADATA_FIELD_NAME, BinaryType, nullable =
false) +: fields)
} else {
StructType(fields)
}
@@ -560,7 +562,7 @@ case object SparkShreddingUtils {
}
schema.fields.zipWithIndex.foreach { case (f, i) =>
f.name match {
- case TypedValueFieldName =>
+ case TYPED_VALUE_FIELD_NAME =>
if (typedIdx != -1) {
throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
}
@@ -585,7 +587,7 @@ case object SparkShreddingUtils {
case s: StructType => arraySchema = buildVariantSchema(s,
topLevel = false)
case _ => throw
QueryCompilationErrors.invalidVariantShreddingSchema(schema)
}
- case t => scalarSchema = (t match {
+ case t => scalarSchema = t match {
case BooleanType => new VariantSchema.BooleanType
case ByteType => new
VariantSchema.IntegralType(VariantSchema.IntegralSize.BYTE)
case ShortType => new
VariantSchema.IntegralType(VariantSchema.IntegralSize.SHORT)
@@ -600,14 +602,14 @@ case object SparkShreddingUtils {
case TimestampNTZType => new VariantSchema.TimestampNTZType
case d: DecimalType => new
VariantSchema.DecimalType(d.precision, d.scale)
case _ => throw
QueryCompilationErrors.invalidVariantShreddingSchema(schema)
- })
+ }
}
- case VariantValueFieldName =>
+ case VARIANT_VALUE_FIELD_NAME =>
if (variantIdx != -1 || f.dataType != BinaryType) {
throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
}
variantIdx = i
- case MetadataFieldName =>
+ case METADATA_FIELD_NAME =>
if (topLevelMetadataIdx != -1 || f.dataType != BinaryType) {
throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
}
@@ -651,7 +653,8 @@ case object SparkShreddingUtils {
converter.convertField(column.getChild(0)).sparkType
}
- class SparkShreddedResult(schema: VariantSchema) extends
VariantShreddingWriter.ShreddedResult {
+ private class SparkShreddedResult(schema: VariantSchema)
+ extends VariantShreddingWriter.ShreddedResult {
// Result is stored as an InternalRow.
val row = new GenericInternalRow(schema.numFields)
@@ -662,8 +665,8 @@ case object SparkShreddingUtils {
}
override def addObject(values:
Array[VariantShreddingWriter.ShreddedResult]): Unit = {
- val innerRow = new GenericInternalRow(schema.objectSchema.size)
- for (i <- 0 until values.length) {
+ val innerRow = new GenericInternalRow(schema.objectSchema.length)
+ for (i <- values.indices) {
innerRow.update(i, values(i).asInstanceOf[SparkShreddedResult].row)
}
row.update(schema.typedIdx, innerRow)
@@ -688,7 +691,7 @@ case object SparkShreddingUtils {
}
}
- class SparkShreddedResultBuilder() extends
VariantShreddingWriter.ShreddedResultBuilder {
+ private class SparkShreddedResultBuilder extends
VariantShreddingWriter.ShreddedResultBuilder {
override def createEmpty(schema: VariantSchema):
VariantShreddingWriter.ShreddedResult = {
new SparkShreddedResult(schema)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]