Chunhui Shi created DRILL-5328:
----------------------------------
Summary: Trim down physical plan size - replace
StoragePluginConfig with storage name
Key: DRILL-5328
URL: https://issues.apache.org/jira/browse/DRILL-5328
Project: Apache Drill
Issue Type: Improvement
Reporter: Chunhui Shi
For a physical plan, we now pass StoragePluginConfig as part of plan, then the
destination use the config to fetch the storage plugin in
StoragePluginRegistry. However, we can also fetch a storage plugin with the
name which is identical to all Drillbits.
In the example of simple physical plan of 150 lines shown below, the storage
plugin config took 60 lines. In a typical large system, FileSystem's
StoragePluginConfig could be >500 lines. So this improvement should save the
cost of passing a larger physical plan among nodes.
0: jdbc:drill:zk=10.10.88.126:5181> explain plan for select * from
dfs.tmp.employee1 where last_name='Blumberg';
+------+------+
| text | json |
+------+------+
| 00-00 Screen
00-01 Project(*=[$0])
00-02 Project(T1¦¦*=[$0])
00-03 SelectionVectorRemover
00-04 Filter(condition=[=($1, 'Blumberg')])
00-05 Project(T1¦¦*=[$0], last_name=[$1])
00-06 Scan(groupscan=[ParquetGroupScan
[entries=[ReadEntryWithPath [path=/tmp/employee1/0_0_0.parquet]],
selectionRoot=/tmp/employee1, numFiles=1, usedMetadataFile=true,
cacheFileRoot=/tmp/employee1, columns=[`*`]]])
| {
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ ],
"queue" : 0,
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "parquet-scan",
"@id" : 6,
"userName" : "root",
"entries" : [ {
"path" : "/tmp/employee1/0_0_0.parquet"
} ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "maprfs:///",
"config" : null,
"workspaces" : {
"root" : {
"location" : "/",
"writable" : false,
"defaultInputFormat" : null
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"defaultInputFormat" : null
},
"shi" : {
"location" : "/user/shi",
"writable" : true,
"defaultInputFormat" : null
},
"dir700" : {
"location" : "/user/shi/dir700",
"writable" : true,
"defaultInputFormat" : null
},
"dir775" : {
"location" : "/user/shi/dir775",
"writable" : true,
"defaultInputFormat" : null
},
"xyz" : {
"location" : "/user/xyz",
"writable" : true,
"defaultInputFormat" : null
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json",
"extensions" : [ "json" ]
},
"maprdb" : {
"type" : "maprdb"
}
}
},
"format" : {
"type" : "parquet"
},
"columns" : [ "`*`" ],
"selectionRoot" : "/tmp/employee1",
"filter" : "true",
"fileSet" : [ "/tmp/employee1/0_0_0.parquet" ],
"files" : [ "/tmp/employee1/0_0_0.parquet" ],
"cost" : 1155.0
}, {
"pop" : "project",
"@id" : 5,
"exprs" : [ {
"ref" : "`T1¦¦*`",
"expr" : "`*`"
}, {
"ref" : "`last_name`",
"expr" : "`last_name`"
} ],
"child" : 6,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1155.0
}, {
"pop" : "filter",
"@id" : 4,
"child" : 5,
"expr" : "equal(`last_name`, 'Blumberg') ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 173.25
}, {
"pop" : "selection-vector-remover",
"@id" : 3,
"child" : 4,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 173.25
}, {
"pop" : "project",
"@id" : 2,
"exprs" : [ {
"ref" : "`T1¦¦*`",
"expr" : "`T1¦¦*`"
} ],
"child" : 3,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 173.25
}, {
"pop" : "project",
"@id" : 1,
"exprs" : [ {
"ref" : "`*`",
"expr" : "`T1¦¦*`"
} ],
"child" : 2,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 173.25
}, {
"pop" : "screen",
"@id" : 0,
"child" : 1,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 173.25
} ]
} |
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)