Chunhui Shi created DRILL-5328:
----------------------------------

             Summary: Trim down physical plan size - replace 
StoragePluginConfig with storage name
                 Key: DRILL-5328
                 URL: https://issues.apache.org/jira/browse/DRILL-5328
             Project: Apache Drill
          Issue Type: Improvement
            Reporter: Chunhui Shi


For a physical plan, we now pass StoragePluginConfig as part of plan, then the 
destination use the config to fetch the storage plugin in 
StoragePluginRegistry. However, we can also fetch a storage plugin with the 
name which is identical to all Drillbits. 

In the example of simple physical plan of 150 lines shown below,  the storage 
plugin config took 60 lines. In a typical large system, FileSystem's 
StoragePluginConfig could be >500 lines. So this improvement should save the 
cost of passing a larger physical plan among nodes.

0: jdbc:drill:zk=10.10.88.126:5181> explain plan for select * from 
dfs.tmp.employee1 where last_name='Blumberg';
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(*=[$0])
00-02        Project(T1¦¦*=[$0])
00-03          SelectionVectorRemover
00-04            Filter(condition=[=($1, 'Blumberg')])
00-05              Project(T1¦¦*=[$0], last_name=[$1])
00-06                Scan(groupscan=[ParquetGroupScan 
[entries=[ReadEntryWithPath [path=/tmp/employee1/0_0_0.parquet]], 
selectionRoot=/tmp/employee1, numFiles=1, usedMetadataFile=true, 
cacheFileRoot=/tmp/employee1, columns=[`*`]]])
 | {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "queue" : 0,
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "parquet-scan",
    "@id" : 6,
    "userName" : "root",
    "entries" : [ {
      "path" : "/tmp/employee1/0_0_0.parquet"
    } ],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "maprfs:///",
      "config" : null,
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "defaultInputFormat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "shi" : {
          "location" : "/user/shi",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "dir700" : {
          "location" : "/user/shi/dir700",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "dir775" : {
          "location" : "/user/shi/dir775",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "xyz" : {
          "location" : "/user/xyz",
          "writable" : true,
          "defaultInputFormat" : null
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json",
          "extensions" : [ "json" ]
        },
        "maprdb" : {
          "type" : "maprdb"
        }
      }
    },
    "format" : {
      "type" : "parquet"
    },
    "columns" : [ "`*`" ],
    "selectionRoot" : "/tmp/employee1",
    "filter" : "true",
    "fileSet" : [ "/tmp/employee1/0_0_0.parquet" ],
    "files" : [ "/tmp/employee1/0_0_0.parquet" ],
    "cost" : 1155.0
  }, {
    "pop" : "project",
    "@id" : 5,
    "exprs" : [ {
      "ref" : "`T1¦¦*`",
      "expr" : "`*`"
    }, {
      "ref" : "`last_name`",
      "expr" : "`last_name`"
    } ],
    "child" : 6,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1155.0
  }, {
    "pop" : "filter",
    "@id" : 4,
    "child" : 5,
    "expr" : "equal(`last_name`, 'Blumberg') ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 173.25
  }, {
    "pop" : "selection-vector-remover",
    "@id" : 3,
    "child" : 4,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 173.25
  }, {
    "pop" : "project",
    "@id" : 2,
    "exprs" : [ {
      "ref" : "`T1¦¦*`",
      "expr" : "`T1¦¦*`"
    } ],
    "child" : 3,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 173.25
  }, {
    "pop" : "project",
    "@id" : 1,
    "exprs" : [ {
      "ref" : "`*`",
      "expr" : "`T1¦¦*`"
    } ],
    "child" : 2,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 173.25
  }, {
    "pop" : "screen",
    "@id" : 0,
    "child" : 1,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 173.25
  } ]
} |




--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to