Chunhui Shi created DRILL-5328: ---------------------------------- Summary: Trim down physical plan size - replace StoragePluginConfig with storage name Key: DRILL-5328 URL: https://issues.apache.org/jira/browse/DRILL-5328 Project: Apache Drill Issue Type: Improvement Reporter: Chunhui Shi
For a physical plan, we now pass StoragePluginConfig as part of plan, then the destination use the config to fetch the storage plugin in StoragePluginRegistry. However, we can also fetch a storage plugin with the name which is identical to all Drillbits. In the example of simple physical plan of 150 lines shown below, the storage plugin config took 60 lines. In a typical large system, FileSystem's StoragePluginConfig could be >500 lines. So this improvement should save the cost of passing a larger physical plan among nodes. 0: jdbc:drill:zk=10.10.88.126:5181> explain plan for select * from dfs.tmp.employee1 where last_name='Blumberg'; +------+------+ | text | json | +------+------+ | 00-00 Screen 00-01 Project(*=[$0]) 00-02 Project(T1¦¦*=[$0]) 00-03 SelectionVectorRemover 00-04 Filter(condition=[=($1, 'Blumberg')]) 00-05 Project(T1¦¦*=[$0], last_name=[$1]) 00-06 Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=/tmp/employee1/0_0_0.parquet]], selectionRoot=/tmp/employee1, numFiles=1, usedMetadataFile=true, cacheFileRoot=/tmp/employee1, columns=[`*`]]]) | { "head" : { "version" : 1, "generator" : { "type" : "ExplainHandler", "info" : "" }, "type" : "APACHE_DRILL_PHYSICAL", "options" : [ ], "queue" : 0, "resultMode" : "EXEC" }, "graph" : [ { "pop" : "parquet-scan", "@id" : 6, "userName" : "root", "entries" : [ { "path" : "/tmp/employee1/0_0_0.parquet" } ], "storage" : { "type" : "file", "enabled" : true, "connection" : "maprfs:///", "config" : null, "workspaces" : { "root" : { "location" : "/", "writable" : false, "defaultInputFormat" : null }, "tmp" : { "location" : "/tmp", "writable" : true, "defaultInputFormat" : null }, "shi" : { "location" : "/user/shi", "writable" : true, "defaultInputFormat" : null }, "dir700" : { "location" : "/user/shi/dir700", "writable" : true, "defaultInputFormat" : null }, "dir775" : { "location" : "/user/shi/dir775", "writable" : true, "defaultInputFormat" : null }, "xyz" : { "location" : "/user/xyz", "writable" : true, "defaultInputFormat" : null } }, "formats" : { "psv" : { "type" : "text", "extensions" : [ "tbl" ], "delimiter" : "|" }, "csv" : { "type" : "text", "extensions" : [ "csv" ], "delimiter" : "," }, "tsv" : { "type" : "text", "extensions" : [ "tsv" ], "delimiter" : "\t" }, "parquet" : { "type" : "parquet" }, "json" : { "type" : "json", "extensions" : [ "json" ] }, "maprdb" : { "type" : "maprdb" } } }, "format" : { "type" : "parquet" }, "columns" : [ "`*`" ], "selectionRoot" : "/tmp/employee1", "filter" : "true", "fileSet" : [ "/tmp/employee1/0_0_0.parquet" ], "files" : [ "/tmp/employee1/0_0_0.parquet" ], "cost" : 1155.0 }, { "pop" : "project", "@id" : 5, "exprs" : [ { "ref" : "`T1¦¦*`", "expr" : "`*`" }, { "ref" : "`last_name`", "expr" : "`last_name`" } ], "child" : 6, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 1155.0 }, { "pop" : "filter", "@id" : 4, "child" : 5, "expr" : "equal(`last_name`, 'Blumberg') ", "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 173.25 }, { "pop" : "selection-vector-remover", "@id" : 3, "child" : 4, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 173.25 }, { "pop" : "project", "@id" : 2, "exprs" : [ { "ref" : "`T1¦¦*`", "expr" : "`T1¦¦*`" } ], "child" : 3, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 173.25 }, { "pop" : "project", "@id" : 1, "exprs" : [ { "ref" : "`*`", "expr" : "`T1¦¦*`" } ], "child" : 2, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 173.25 }, { "pop" : "screen", "@id" : 0, "child" : 1, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 173.25 } ] } | -- This message was sent by Atlassian JIRA (v6.3.15#6346)