Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197502251
--- Diff:
integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala
---
@@ -67,14 +68,68 @@ case class CarbonDatasourceHadoopRelation(
override def schema: StructType =
tableSchema.getOrElse(carbonRelation.schema)
def buildScan(requiredColumns: Array[String],
+ projects: Seq[NamedExpression],
filters: Array[Filter],
partitions: Seq[PartitionSpec]): RDD[InternalRow] = {
val filterExpression: Option[Expression] = filters.flatMap { filter =>
CarbonFilters.createCarbonFilter(schema, filter)
}.reduceOption(new AndExpression(_, _))
+ var parentColumns = new ListBuffer[String]
+ // In case of Struct or StructofStruct Complex type, get the project
column for given
+ // parent/child field and pushdown the corresponding project column.
In case of Array,
+ // ArrayofStruct or StructofArray, pushdown parent column
+ var reqColumns = projects.map {
+ case a@Alias(s: GetStructField, name) =>
+ val arrayTypeExists = s.childSchema.map(x => x.dataType)
+ .filter(dataType => dataType.isInstanceOf[ArrayType])
+ if (0 == arrayTypeExists.length) {
+ val columnName = s.toString().replaceAll("#[0-9]*", "")
+ parentColumns += columnName.split("\\.")(0)
+ columnName
+ }
+ else {
+ None
+ }
+ case a@Alias(s: GetArrayItem, name) =>
+ None
+ case other => other.name.replaceAll("#[0-9]*", "")
+ }
+
+ var reqCols = reqColumns.filterNot(none => none.equals(None)).map(col
=> col.toString)
+ parentColumns = parentColumns.distinct
+ reqCols = reqCols.distinct
+
+ // if the parent column is there in the projection list then we can
filter out all the children
+ // in that projection list
+ val parentColumnOnProjectionList = reqCols.filter(col =>
parentColumns.contains(col))
--- End diff --
Remove merge logic here
---