arina-ielchiieva commented on a change in pull request #2060: URL: https://github.com/apache/drill/pull/2060#discussion_r410895858
########## File path: metastore/rdbms-metastore/src/main/java/org/apache/drill/metastore/rdbms/components/tables/TablesMetadataMapper.java ########## @@ -0,0 +1,606 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.metastore.rdbms.components.tables; + +import org.apache.drill.metastore.MetastoreColumn; +import org.apache.drill.metastore.components.tables.TableMetadataUnit; +import org.apache.drill.metastore.rdbms.transform.AbstractMetadataMapper; +import org.apache.drill.metastore.rdbms.transform.RdbmsFilterExpressionVisitor; +import org.apache.drill.metastore.rdbms.util.ConverterUtil; +import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap; +import org.jooq.Condition; +import org.jooq.Field; +import org.jooq.Record; +import org.jooq.Table; +import org.jooq.generated.Tables; +import org.jooq.generated.tables.records.FilesRecord; +import org.jooq.generated.tables.records.PartitionsRecord; +import org.jooq.generated.tables.records.RowGroupsRecord; +import org.jooq.generated.tables.records.SegmentsRecord; +import org.jooq.generated.tables.records.TablesRecord; +import org.jooq.impl.DSL; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * Abstract implementation of {@link AbstractMetadataMapper} for RDBMS Metastore tables component. + * Contains common code for specific RDBMS Metastore tables component tables. + * + * @param <R> RDBMS table record type + */ +public abstract class TablesMetadataMapper<R extends Record> extends AbstractMetadataMapper<TableMetadataUnit, R> { + + protected static final Function<TableMetadataUnit, List<String>> TABLE_PARTITION_KEY = unit -> + Arrays.asList(unit.storagePlugin(), unit.workspace(), unit.tableName()); + + protected static final Function<TableMetadataUnit, List<String>> COMPONENT_PARTITION_KEY = unit -> + Arrays.asList(unit.storagePlugin(), unit.workspace(), unit.tableName(), unit.metadataKey()); + + @Override + public TableMetadataUnit emptyUnit() { + return TableMetadataUnit.EMPTY_UNIT; + } + + @Override + public List<Condition> toDeleteConditions(List<TableMetadataUnit> units) { + Set<List<String>> partitionValues = units.stream() + .collect(Collectors.groupingBy(partitionKey(), Collectors.toList())) + .keySet(); + + return partitionValues.stream() + .map(values -> DSL.and(toConditions(values))) + .collect(Collectors.toList()); + } + + /** + * @return function to determine partition key for specific table + */ + protected abstract Function<TableMetadataUnit, List<String>> partitionKey(); + + /** + * Creates JOOQ conditions based on given list of partition values. + * Matching is order based. + * + * @param values partition values + * @return list of JOOQ conditions + */ + protected abstract List<Condition> toConditions(List<String> values); + + /** + * {@link TablesMetadataMapper} implementation for {@link Tables#TABLES} table. + */ + public static class TableMapper extends TablesMetadataMapper<TablesRecord> { + + private static final TableMapper INSTANCE = new TableMapper(); + + private static final Map<MetastoreColumn, Field<?>> COLUMNS_MAP = ImmutableMap.<MetastoreColumn, Field<?>>builder() + .put(MetastoreColumn.STORAGE_PLUGIN, Tables.TABLES.STORAGE_PLUGIN) + .put(MetastoreColumn.WORKSPACE, Tables.TABLES.WORKSPACE) + .put(MetastoreColumn.TABLE_NAME, Tables.TABLES.TABLE_NAME) + .put(MetastoreColumn.OWNER, Tables.TABLES.OWNER) + .put(MetastoreColumn.TABLE_TYPE, Tables.TABLES.TABLE_TYPE) + .put(MetastoreColumn.METADATA_KEY, Tables.TABLES.METADATA_KEY) + .put(MetastoreColumn.METADATA_TYPE, Tables.TABLES.METADATA_TYPE) + .put(MetastoreColumn.LOCATION, Tables.TABLES.LOCATION) + .put(MetastoreColumn.INTERESTING_COLUMNS, Tables.TABLES.INTERESTING_COLUMNS) + .put(MetastoreColumn.SCHEMA, Tables.TABLES.SCHEMA) + .put(MetastoreColumn.COLUMNS_STATISTICS, Tables.TABLES.COLUMN_STATISTICS) + .put(MetastoreColumn.METADATA_STATISTICS, Tables.TABLES.METADATA_STATISTICS) + .put(MetastoreColumn.PARTITION_KEYS, Tables.TABLES.PARTITION_KEYS) + .put(MetastoreColumn.LAST_MODIFIED_TIME, Tables.TABLES.LAST_MODIFIED_TIME) + .put(MetastoreColumn.ADDITIONAL_METADATA, Tables.TABLES.ADDITIONAL_METADATA) + .build(); Review comment: Yes, our RDBMS Metastore is more like blob storage for large fields and regular storage for fields by which we do filter the data. Denormailization here was chosen deliberately taking into account semantics of the Metastore API implementation and the way data is stored / deleted and accessed. Having normalized structure will be an overhead since we mostly extract all data for table, the same way when we update data - we don't do update specific fields, we just erase all data and re-write. If we used normalized structure we would end up as HMS Metastore with many tables and relations but since we always need all data, we would have to do a lot of joins all the time. Since many queries to Metastore are generated at runtime, it would be a challenge to find framework that would handle this nicely, maybe Hibernate but still it won't cover all cases. The same would apply for data update or deletion. First delete data from all data about the table then insert, update PR / FK relations. This would impact performance significantly. It's worth mentioning that Metastore API is generic to any type of storage: file based, RDBMS based or NoSQL based. This way sometimes to make sure all works we should sacrifice something, in our case normalization. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
