[
https://issues.apache.org/jira/browse/DRILL-6101?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16568460#comment-16568460
]
ASF GitHub Bot commented on DRILL-6101:
---------------------------------------
ilooner closed pull request #1414: DRILL-6101: Optimized implicit columns
handling within scanner
URL: https://github.com/apache/drill/pull/1414
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillScanRel.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillScanRel.java
index df80a10fd20..a64831b6d77 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillScanRel.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillScanRel.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.planner.logical;
+import java.util.ArrayList;
import java.io.IOException;
import java.util.List;
@@ -61,12 +62,12 @@ public DrillScanRel(final RelOptCluster cluster, final
RelTraitSet traits,
final RelOptTable table) {
this(cluster, traits, table, false);
}
- /** Creates a DrillScan. */
+ /** Creates a DrillScan. */
public DrillScanRel(final RelOptCluster cluster, final RelTraitSet traits,
- final RelOptTable table, boolean partitionFilterPushdown) {
+ final RelOptTable table, boolean
partitionFilterPushdown) {
// By default, scan does not support project pushdown.
// Decision whether push projects into scan will be made solely in
DrillPushProjIntoScanRule.
- this(cluster, traits, table, table.getRowType(), GroupScan.ALL_COLUMNS,
partitionFilterPushdown);
+ this(cluster, traits, table, table.getRowType(),
getProjectedColumns(table, true), partitionFilterPushdown);
this.settings = PrelUtil.getPlannerSettings(cluster.getPlanner());
}
@@ -78,7 +79,7 @@ public DrillScanRel(final RelOptCluster cluster, final
RelTraitSet traits,
/** Creates a DrillScan. */
public DrillScanRel(final RelOptCluster cluster, final RelTraitSet traits,
- final RelOptTable table, final RelDataType rowType, final
List<SchemaPath> columns, boolean partitionFilterPushdown) {
+ final RelOptTable table, final RelDataType rowType,
final List<SchemaPath> columns, boolean partitionFilterPushdown) {
super(DRILL_LOGICAL, cluster, traits, table);
this.settings = PrelUtil.getPlannerSettings(cluster.getPlanner());
this.rowType = rowType;
@@ -100,7 +101,7 @@ public DrillScanRel(final RelOptCluster cluster, final
RelTraitSet traits,
/** Creates a DrillScanRel for a particular GroupScan */
public DrillScanRel(final RelOptCluster cluster, final RelTraitSet traits,
- final RelOptTable table, final GroupScan groupScan, final RelDataType
rowType, final List<SchemaPath> columns, boolean partitionFilterPushdown) {
+ final RelOptTable table, final GroupScan groupScan,
final RelDataType rowType, final List<SchemaPath> columns, boolean
partitionFilterPushdown) {
super(DRILL_LOGICAL, cluster, traits, table);
this.rowType = rowType;
this.columns = columns;
@@ -194,4 +195,21 @@ public boolean partitionFilterPushdown() {
return this.partitionFilterPushdown;
}
+ private static List<SchemaPath> getProjectedColumns(final RelOptTable table,
boolean isSelectStar) {
+ List<String> columnNames = table.getRowType().getFieldNames();
+ List<SchemaPath> projectedColumns = new
ArrayList<SchemaPath>(columnNames.size());
+
+ for (String columnName : columnNames) {
+ projectedColumns.add(SchemaPath.getSimplePath(columnName));
+ }
+
+ // If the row-type doesn't contain the STAR keyword, then insert it
+ // as we are dealing with a SELECT_STAR query.
+ if (isSelectStar && !Utilities.isStarQuery(projectedColumns)) {
+ projectedColumns.add(SchemaPath.STAR_COLUMN);
+ }
+
+ return projectedColumns;
+ }
+
}
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java
index 48dad7f71bd..e4357196747 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java
@@ -255,13 +255,17 @@ public boolean containsImplicitColumns() {
* 1. table columns
* 2. partition columns
* 3. implicit file columns
+ * If it is a star query, then only includes implicit columns that were
+ * explicitly selected (e.g., SELECT *, FILENAME FROM ..)
*/
private void init() {
- if (isStarQuery) {
- selectedImplicitColumns.putAll(allImplicitColumns);
- } else {
- for (SchemaPath column : columns) {
- String path = column.getRootSegmentPath();
+ for (SchemaPath column : columns) {
+ final String path = column.getRootSegmentPath();
+ if (isStarQuery) {
+ if (allImplicitColumns.get(path) != null) {
+ selectedImplicitColumns.put(path, allImplicitColumns.get(path));
+ }
+ } else {
if (isPartitionColumn(partitionDesignator, path)) {
selectedPartitionColumns.add(Integer.parseInt(path.substring(partitionDesignator.length())));
} else if (allImplicitColumns.get(path) != null) {
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Optimize Implicit Columns Processing
> ------------------------------------
>
> Key: DRILL-6101
> URL: https://issues.apache.org/jira/browse/DRILL-6101
> Project: Apache Drill
> Issue Type: Improvement
> Components: Execution - Relational Operators
> Affects Versions: 1.12.0
> Reporter: salim achouche
> Assignee: salim achouche
> Priority: Critical
> Labels: ready-to-commit
> Fix For: 1.15.0
>
>
> Problem Description -
> * Apache Drill allows users to specify columns even for SELECT STAR queries
> * From my discussion with [~paul-rogers], Apache Calcite has a limitation
> where the, extra columns are not provided
> * The workaround has been to always include all implicit columns for SELECT
> STAR queries
> * Unfortunately, the current implementation is very inefficient as implicit
> column values get duplicated; this leads to substantial performance
> degradation when the number of rows are large
> Suggested Optimization -
> * The NullableVarChar vector should be enhanced to efficiently store
> duplicate values
> * This will not only address the current Calcite limitations (for SELECT
> STAR queries) but also optimize all queries with implicit columns
>
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)