yihua commented on code in PR #10167:
URL: https://github.com/apache/hudi/pull/10167#discussion_r1417895540
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala:
##########
@@ -25,7 +25,7 @@ import org.apache.hudi.cdc.{CDCFileGroupIterator,
CDCRelation, HoodieCDCFileGrou
import org.apache.hudi.common.config.TypedProperties
import org.apache.hudi.common.engine.HoodieReaderContext
import org.apache.hudi.common.fs.FSUtils
-import org.apache.hudi.common.model.{FileSlice, HoodieBaseFile,
HoodieFileGroupId, HoodieLogFile, HoodieRecord}
+import org.apache.hudi.common.model._
Review Comment:
Avoid using `_` for imports unless really necessary.
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala:
##########
@@ -355,12 +364,50 @@ class
HoodieFileGroupReaderBasedParquetFileFormat(tableState: HoodieTableState,
(baseFileReader, preMergeBaseFileReader, skeletonReader,
bootstrapBaseReader)
}
+}
+
+object HoodieFileGroupReaderBasedParquetFileFormat {
+ private val ROW_INDEX = "row_index"
+ private val ROW_INDEX_TEMPORARY_COLUMN_NAME = s"_tmp_metadata_$ROW_INDEX"
+ private val FILE_SOURCE_GENERATED_METADATA_COL_ATTR_KEY =
"__file_source_generated_metadata_col"
+ private val FILE_SOURCE_METADATA_COL_ATTR_KEY = "__file_source_metadata_col"
+ private val METADATA_COL_ATTR_KEY = "__metadata_col"
Review Comment:
Add docs on where these come from, i.e., related Spark classes and
variables, and what they mean.
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala:
##########
@@ -35,9 +35,10 @@ import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.JoinedRow
import org.apache.spark.sql.execution.datasources.PartitionedFile
+import
org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.{ROW_INDEX,
ROW_INDEX_TEMPORARY_COLUMN_NAME, getAppliedFilters, getAppliedRequiredSchema,
getLogFilesFromSlice, getRecordKeyRelatedFilters}
import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.isMetaField
import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.types._
Review Comment:
same here for imports with `_`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]