Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2396#discussion_r197502251 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala --- @@ -67,14 +68,68 @@ case class CarbonDatasourceHadoopRelation( override def schema: StructType = tableSchema.getOrElse(carbonRelation.schema) def buildScan(requiredColumns: Array[String], + projects: Seq[NamedExpression], filters: Array[Filter], partitions: Seq[PartitionSpec]): RDD[InternalRow] = { val filterExpression: Option[Expression] = filters.flatMap { filter => CarbonFilters.createCarbonFilter(schema, filter) }.reduceOption(new AndExpression(_, _)) + var parentColumns = new ListBuffer[String] + // In case of Struct or StructofStruct Complex type, get the project column for given + // parent/child field and pushdown the corresponding project column. In case of Array, + // ArrayofStruct or StructofArray, pushdown parent column + var reqColumns = projects.map { + case a@Alias(s: GetStructField, name) => + val arrayTypeExists = s.childSchema.map(x => x.dataType) + .filter(dataType => dataType.isInstanceOf[ArrayType]) + if (0 == arrayTypeExists.length) { + val columnName = s.toString().replaceAll("#[0-9]*", "") + parentColumns += columnName.split("\\.")(0) + columnName + } + else { + None + } + case a@Alias(s: GetArrayItem, name) => + None + case other => other.name.replaceAll("#[0-9]*", "") + } + + var reqCols = reqColumns.filterNot(none => none.equals(None)).map(col => col.toString) + parentColumns = parentColumns.distinct + reqCols = reqCols.distinct + + // if the parent column is there in the projection list then we can filter out all the children + // in that projection list + val parentColumnOnProjectionList = reqCols.filter(col => parentColumns.contains(col)) --- End diff -- Remove merge logic here
---