dongjoon-hyun commented on a change in pull request #28761:
URL: https://github.com/apache/spark/pull/28761#discussion_r466607253
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala
##########
@@ -37,12 +40,44 @@ trait OrcFiltersBase {
}
/**
- * Return true if this is a searchable type in ORC.
- * Both CharType and VarcharType are cleaned at AstBuilder.
+ * This method returns a map which contains ORC field name and data type.
Each key
+ * represents a column; `dots` are used as separators for nested columns. If
any part
+ * of the names contains `dots`, it is quoted to avoid confusion. See
+ * `org.apache.spark.sql.connector.catalog.quote` for implementation details.
*/
- protected[sql] def isSearchableType(dataType: DataType) = dataType match {
- case BinaryType => false
- case _: AtomicType => true
- case _ => false
+ protected[sql] def getNameToOrcFieldMap(
+ schema: StructType,
+ caseSensitive: Boolean): Map[String, DataType] = {
+ import
org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
+
+ def getPrimitiveFields(
+ fields: Seq[StructField],
+ parentFieldNames: Seq[String] = Seq.empty): Seq[(String, DataType)] = {
+ fields.flatMap { f =>
+ f.dataType match {
+ case st: StructType =>
+ getPrimitiveFields(st.fields, parentFieldNames :+ f.name)
+ case BinaryType => None
+ case _: AtomicType =>
+ Some(((parentFieldNames :+ f.name).quoted, f.dataType))
+ case _ => None
+ }
+ }
+ }
+
+ val primitiveFields = getPrimitiveFields(schema.fields)
+ if (caseSensitive) {
+ primitiveFields.toMap
+ } else {
+ // Don't consider ambiguity here, i.e. more than one field is matched in
case insensitive
+ // mode, just skip pushdown for these fields, they will trigger
Exception when reading,
+ // See: SPARK-25175.
+ val dedupPrimitiveFields =
+ primitiveFields
Review comment:
indentation?
```scala
- val dedupPrimitiveFields =
- primitiveFields
+ val dedupPrimitiveFields = primitiveFields
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]