This is an automated email from the ASF dual-hosted git repository. dzamo pushed a commit to branch gh-pages in repository https://gitbox.apache.org/repos/asf/drill.git
commit bdc00b63923507ec217880ce75ca795f6be4ed29 Author: James Turton <[email protected]> AuthorDate: Mon Feb 15 13:05:35 2021 +0200 Add 030-rdbms-metastore.md --- _data/docs.json | 120 ++++++++++++--- ...metastore.md => 020-drill-iceberg-metastore.md} | 0 .../drill-metastore/030-rdbms-metastore.md | 161 +++++++++++++++++++++ _sass/_site-main.scss | 1 + 4 files changed, 264 insertions(+), 18 deletions(-) diff --git a/_data/docs.json b/_data/docs.json index f764bcf..a7a7cab 100644 --- a/_data/docs.json +++ b/_data/docs.json @@ -4708,12 +4708,12 @@ } ], "children": [], - "next_title": "Performance Tuning Introduction", - "next_url": "/docs/performance-tuning-introduction/", + "next_title": "RDBMS Metastore", + "next_url": "/docs/rdbms-metastore/", "parent": "Drill Metastore", "previous_title": "Using Drill Metastore", "previous_url": "/docs/using-drill-metastore/", - "relative_path": "_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md", + "relative_path": "_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md", "title": "Drill Iceberg Metastore", "url": "/docs/drill-iceberg-metastore/" }, @@ -4775,14 +4775,35 @@ } ], "children": [], - "next_title": "Performance Tuning Introduction", - "next_url": "/docs/performance-tuning-introduction/", + "next_title": "RDBMS Metastore", + "next_url": "/docs/rdbms-metastore/", "parent": "Drill Metastore", "previous_title": "Using Drill Metastore", "previous_url": "/docs/using-drill-metastore/", - "relative_path": "_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md", + "relative_path": "_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md", "title": "Drill Iceberg Metastore", "url": "/docs/drill-iceberg-metastore/" + }, + { + "breadcrumbs": [ + { + "title": "Drill Metastore", + "url": "/docs/drill-metastore/" + }, + { + "title": "Performance Tuning", + "url": "/docs/performance-tuning/" + } + ], + "children": [], + "next_title": "Performance Tuning Introduction", + "next_url": "/docs/performance-tuning-introduction/", + "parent": "Drill Metastore", + "previous_title": "Drill Iceberg Metastore", + "previous_url": "/docs/drill-iceberg-metastore/", + "relative_path": "_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md", + "title": "RDBMS Metastore", + "url": "/docs/rdbms-metastore/" } ], "next_title": "Using Drill Metastore", @@ -8419,14 +8440,35 @@ } ], "children": [], - "next_title": "Performance Tuning Introduction", - "next_url": "/docs/performance-tuning-introduction/", + "next_title": "RDBMS Metastore", + "next_url": "/docs/rdbms-metastore/", "parent": "Drill Metastore", "previous_title": "Using Drill Metastore", "previous_url": "/docs/using-drill-metastore/", - "relative_path": "_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md", + "relative_path": "_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md", "title": "Drill Iceberg Metastore", "url": "/docs/drill-iceberg-metastore/" + }, + { + "breadcrumbs": [ + { + "title": "Drill Metastore", + "url": "/docs/drill-metastore/" + }, + { + "title": "Performance Tuning", + "url": "/docs/performance-tuning/" + } + ], + "children": [], + "next_title": "Performance Tuning Introduction", + "next_url": "/docs/performance-tuning-introduction/", + "parent": "Drill Metastore", + "previous_title": "Drill Iceberg Metastore", + "previous_url": "/docs/drill-iceberg-metastore/", + "relative_path": "_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md", + "title": "RDBMS Metastore", + "url": "/docs/rdbms-metastore/" } ], "next_title": "Using Drill Metastore", @@ -8449,8 +8491,8 @@ "next_title": "Partition Pruning", "next_url": "/docs/partition-pruning/", "parent": "Performance Tuning", - "previous_title": "Drill Iceberg Metastore", - "previous_url": "/docs/drill-iceberg-metastore/", + "previous_title": "RDBMS Metastore", + "previous_url": "/docs/rdbms-metastore/", "relative_path": "_docs/performance-tuning/010-performance-tuning-introduction.md", "title": "Performance Tuning Introduction", "url": "/docs/performance-tuning-introduction/" @@ -8944,8 +8986,8 @@ "next_title": "Partition Pruning", "next_url": "/docs/partition-pruning/", "parent": "Performance Tuning", - "previous_title": "Drill Iceberg Metastore", - "previous_url": "/docs/drill-iceberg-metastore/", + "previous_title": "RDBMS Metastore", + "previous_url": "/docs/rdbms-metastore/", "relative_path": "_docs/performance-tuning/010-performance-tuning-introduction.md", "title": "Performance Tuning Introduction", "url": "/docs/performance-tuning-introduction/" @@ -10827,6 +10869,27 @@ "title": "Querying the INFORMATION SCHEMA", "url": "/docs/querying-the-information-schema/" }, + "RDBMS Metastore": { + "breadcrumbs": [ + { + "title": "Drill Metastore", + "url": "/docs/drill-metastore/" + }, + { + "title": "Performance Tuning", + "url": "/docs/performance-tuning/" + } + ], + "children": [], + "next_title": "Performance Tuning Introduction", + "next_url": "/docs/performance-tuning-introduction/", + "parent": "Drill Metastore", + "previous_title": "Drill Iceberg Metastore", + "previous_url": "/docs/drill-iceberg-metastore/", + "relative_path": "_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md", + "title": "RDBMS Metastore", + "url": "/docs/rdbms-metastore/" + }, "RDBMS Storage Plugin": { "breadcrumbs": [ { @@ -20035,14 +20098,35 @@ } ], "children": [], - "next_title": "Performance Tuning Introduction", - "next_url": "/docs/performance-tuning-introduction/", + "next_title": "RDBMS Metastore", + "next_url": "/docs/rdbms-metastore/", "parent": "Drill Metastore", "previous_title": "Using Drill Metastore", "previous_url": "/docs/using-drill-metastore/", - "relative_path": "_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md", + "relative_path": "_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md", "title": "Drill Iceberg Metastore", "url": "/docs/drill-iceberg-metastore/" + }, + { + "breadcrumbs": [ + { + "title": "Drill Metastore", + "url": "/docs/drill-metastore/" + }, + { + "title": "Performance Tuning", + "url": "/docs/performance-tuning/" + } + ], + "children": [], + "next_title": "Performance Tuning Introduction", + "next_url": "/docs/performance-tuning-introduction/", + "parent": "Drill Metastore", + "previous_title": "Drill Iceberg Metastore", + "previous_url": "/docs/drill-iceberg-metastore/", + "relative_path": "_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md", + "title": "RDBMS Metastore", + "url": "/docs/rdbms-metastore/" } ], "next_title": "Using Drill Metastore", @@ -20065,8 +20149,8 @@ "next_title": "Partition Pruning", "next_url": "/docs/partition-pruning/", "parent": "Performance Tuning", - "previous_title": "Drill Iceberg Metastore", - "previous_url": "/docs/drill-iceberg-metastore/", + "previous_title": "RDBMS Metastore", + "previous_url": "/docs/rdbms-metastore/", "relative_path": "_docs/performance-tuning/010-performance-tuning-introduction.md", "title": "Performance Tuning Introduction", "url": "/docs/performance-tuning-introduction/" diff --git a/_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md b/_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md similarity index 100% rename from _docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md rename to _docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md diff --git a/_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md b/_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md new file mode 100644 index 0000000..e10ce68 --- /dev/null +++ b/_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md @@ -0,0 +1,161 @@ +--- +title: "RDBMS Metastore" +slug: "RDBMS Metastore" +parent: "Drill Metastore" +--- + +The RDBMS Metastore implementation allows you store Drill Metastore metadata in a configured RDBMS. + +## Configuration + +Currently, the RDBMS Metastore is not the default implementation. +To enable the RDBMS Metastore create the `drill-metastore-override.conf` file +in your config directory and specify the RDBMS Metastore class: + +```yaml +drill.metastore: { + implementation.class: "org.apache.drill.metastore.rdbms.RdbmsMetastore" +} +``` + +### Connection properties + +Use the connection properties to specify how Drill should connect to your Metastore database. + +`drill.metastore.rdbms.data_source.driver` - driver class name. Required. +Note: the driver class must be included into the Drill classpath. +The easiest way to do that is to put the driver jar file into the `$DRILL_HOME/jars/3rdparty` folder. +Or, to make upgrades easier, in your `$DRILL_SITE/jars` folder. Drill includes the driver for SQLite. + +`drill.metastore.rdbms.data_source.url` - connection url. Required. + +`drill.metastore.rdbms.data_source.username` - database user on whose behalf the connection is +being made. Optional, if database does not require user to connect. + +`drill.metastore.rdbms.data_source.password` - database user's password. +Optional, if database does not require user's password to connect. + +`drill.metastore.rdbms.data_source.properties` - specifies properties which will be used +during data source creation. See list of available [Hikari properties](https://github.com/brettwooldridge/HikariCP) +for more details. + +### Default configuration + +Out of the box, the Drill RDBMS Metastore is configured to use the embedded file system based SQLite database. +It will be created locally in user's home directory under `${drill.exec.zk.root}"/metastore` location. + +Default setup can be used only in Drill embedded mode. SQLite is an embedded database; is not distributed. +SQLite is good for trying out the feature, for testing, for a running Drill in embedded mode, +and perhaps for a single-node Drill "cluster". If should not be used in a multi-node cluster. +Each Drillbit will have its own version of the truth and behavior will be undefined and incorrect. + +### Custom configuration + +`drill-metastore-override.conf` is used to customize connection details to the Drill Metastore database. +See `drill-metastore-override-example.conf` for more details. + +#### Example of PostgreSQL configuration + +```yaml +drill.metastore: { + implementation.class: "org.apache.drill.metastore.rdbms.RdbmsMetastore", + rdbms: { + data_source: { + driver: "org.postgresql.Driver", + url: "jdbc:postgresql://localhost:1234/mydb?currentSchema=drill_metastore", + username: "user", + password: "password" + } + } +} +``` + +Note: as mentioned above, the PostgreSQL JDBC driver must be present in the Drill classpath. + +#### Example of MySQL configuration + +```yaml +drill.metastore: { + implementation.class: "org.apache.drill.metastore.rdbms.RdbmsMetastore", + rdbms: { + data_source: { + driver: "com.mysql.cj.jdbc.Driver", + url: "jdbc:mysql://localhost:1234/drill_metastore", + username: "user", + password: "password" + } + } +} +``` + +Note: as mentioned above, the MySQL JDBC driver must be present in the Drill classpath. + +{% include startnote.html %} +For MySQL connector version 6+, use the <code>com.mysql.cj.jdbc.Driver</code> driver class, +for older versions use the <code>com.mysql.jdbc.Driver</code>. +{% include endnote.html %} + +## Tables structure + +The Drill Metastore stores several types of metadata, called components. Currently, only the `tables` component is implemented. +The `tables` component provides metadata about Drill tables, including their segments, files, row groups and partitions. +In Drill `tables` component unit is represented by `TableMetadataUnit` class which is applicable to any metadata type. +The `TableMetadataUnit` class holds fields for all five metadata types within the `tables` component. +Any fields not applicable to a particular metadata type are simply ignored and remain unset. + +In the RDBMS implementation of the Drill Metastore, the tables component includes five tables, one for each metadata type. +The five tables are: `TABLES`, `SEGMENTS`, `FILES`, `ROW_GROUPS`, and `PARTITIONS`. +See `src/main/resources/db/changelog/changes/initial_ddls.yaml` for the schema and indexes of each table. + +The Drill Metastore API has the following semantics: +* most of the time all data about component is accessed; +* data is filtered by non-complex fields, like storage plugin, workspace, table name, etc; +* data is overwritten fully, there is no update by certain fields. + +Taking into account the Drill Metastore API semantics, the RDBMS Drill Metastore schema is slightly denormalized. +Having normalized structure would lead to unnecessary joins during select, index re-indexing during update. + +### Table creation + +The RDBMS Metastore uses [Liquibase](https://www.liquibase.org/documentation/core-concepts/index.html) +to create the needed tables during the RDBMS Metastore initialization. Users should not create any tables manually. + +### Database schema + +Liquibase uses a yaml configuration file to apply changes to the database schema: `src/main/resources/db/changelog/changelog.yaml`. +Liquibase converts the yaml specification into the DDL / DML commands suitable required for the configured database. +See list of supported databases: https://www.liquibase.org/databases.html. + +The Drill Metastore tables are created in the database schema indicated in the connection URL. +This will be the default schema unless you specify a different schema. Drill will not create the schema, however. +Best practice is to create a schema within your database for the Drill metastore before initializing the Metastore. + +Example: + +PostgreSQL: `jdbc:postgresql://localhost:1234/mydb?currentSchema=drill_metastore` + +MySQL: `jdbc:mysql://localhost:1234/drill_metastore` + +Since Drill will create the required tables, ensure that the database user has the following permissions in the metastore schema: +* read and write tables; +* create and modify database objects (tables, indexes, views, etc.). + +### Liquibase tables + +During Drill RDBMS Metastore initialization, Liquibase will create two internal tracking tables: +`DATABASECHANGELOG` and `DATABASECHANGELOGLOCK`. They are needed to track schema changes and concurrent updates. +See https://www.liquibase.org/get_started/how-lb-works.html for more details. + +## Query execution + +SQL queries issued to RDBMS Metastore tables are generated using [JOOQ](https://www.jooq.org/doc/3.13/manual/getting-started/). +Drill uses the open-source version of JOOQ to generate the queries sent to the configured Metastore database. + +JOOQ generates SQL statements based on SQL dialect determined by database connection details. +List of supported dialects: https://www.jooq.org/javadoc/3.13.x/org.jooq/org/jooq/SQLDialect.html. +Note: dialects annotated with `@Pro` are not supported, since open-source version of JOOQ is used. + +## Supported databases + +The RDBMS Metastore was tested with `SQLite`, `PostreSQL` and `MySQL`. Other databases should also work +if there is Liquibase and JOOQ support for them. diff --git a/_sass/_site-main.scss b/_sass/_site-main.scss index 8374782..5ebdf39 100644 --- a/_sass/_site-main.scss +++ b/_sass/_site-main.scss @@ -147,6 +147,7 @@ a.anchor { #menu ul li.social-menu-item img { width: 22px; + padding-bottom: 10px; } #menu ul li ul {
