arina-ielchiieva commented on a change in pull request #2060:
URL: https://github.com/apache/drill/pull/2060#discussion_r410895858



##########
File path: 
metastore/rdbms-metastore/src/main/java/org/apache/drill/metastore/rdbms/components/tables/TablesMetadataMapper.java
##########
@@ -0,0 +1,606 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.metastore.rdbms.components.tables;
+
+import org.apache.drill.metastore.MetastoreColumn;
+import org.apache.drill.metastore.components.tables.TableMetadataUnit;
+import org.apache.drill.metastore.rdbms.transform.AbstractMetadataMapper;
+import org.apache.drill.metastore.rdbms.transform.RdbmsFilterExpressionVisitor;
+import org.apache.drill.metastore.rdbms.util.ConverterUtil;
+import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap;
+import org.jooq.Condition;
+import org.jooq.Field;
+import org.jooq.Record;
+import org.jooq.Table;
+import org.jooq.generated.Tables;
+import org.jooq.generated.tables.records.FilesRecord;
+import org.jooq.generated.tables.records.PartitionsRecord;
+import org.jooq.generated.tables.records.RowGroupsRecord;
+import org.jooq.generated.tables.records.SegmentsRecord;
+import org.jooq.generated.tables.records.TablesRecord;
+import org.jooq.impl.DSL;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+/**
+ * Abstract implementation of {@link AbstractMetadataMapper} for RDBMS 
Metastore tables component.
+ * Contains common code for specific RDBMS Metastore tables component tables.
+ *
+ * @param <R> RDBMS table record type
+ */
+public abstract class TablesMetadataMapper<R extends Record> extends 
AbstractMetadataMapper<TableMetadataUnit, R> {
+
+  protected static final Function<TableMetadataUnit, List<String>> 
TABLE_PARTITION_KEY = unit ->
+    Arrays.asList(unit.storagePlugin(), unit.workspace(), unit.tableName());
+
+  protected static final Function<TableMetadataUnit, List<String>> 
COMPONENT_PARTITION_KEY = unit ->
+    Arrays.asList(unit.storagePlugin(), unit.workspace(), unit.tableName(), 
unit.metadataKey());
+
+  @Override
+  public TableMetadataUnit emptyUnit() {
+    return TableMetadataUnit.EMPTY_UNIT;
+  }
+
+  @Override
+  public List<Condition> toDeleteConditions(List<TableMetadataUnit> units) {
+    Set<List<String>> partitionValues = units.stream()
+      .collect(Collectors.groupingBy(partitionKey(), Collectors.toList()))
+      .keySet();
+
+    return partitionValues.stream()
+      .map(values -> DSL.and(toConditions(values)))
+      .collect(Collectors.toList());
+  }
+
+  /**
+   * @return function to determine partition key for specific table
+   */
+  protected abstract Function<TableMetadataUnit, List<String>> partitionKey();
+
+  /**
+   * Creates JOOQ conditions based on given list of partition values.
+   * Matching is order based.
+   *
+   * @param values partition values
+   * @return list of JOOQ conditions
+   */
+  protected abstract List<Condition> toConditions(List<String> values);
+
+  /**
+   * {@link TablesMetadataMapper} implementation for {@link Tables#TABLES} 
table.
+   */
+  public static class TableMapper extends TablesMetadataMapper<TablesRecord> {
+
+    private static final TableMapper INSTANCE = new TableMapper();
+
+    private static final Map<MetastoreColumn, Field<?>> COLUMNS_MAP = 
ImmutableMap.<MetastoreColumn, Field<?>>builder()
+      .put(MetastoreColumn.STORAGE_PLUGIN, Tables.TABLES.STORAGE_PLUGIN)
+      .put(MetastoreColumn.WORKSPACE, Tables.TABLES.WORKSPACE)
+      .put(MetastoreColumn.TABLE_NAME, Tables.TABLES.TABLE_NAME)
+      .put(MetastoreColumn.OWNER, Tables.TABLES.OWNER)
+      .put(MetastoreColumn.TABLE_TYPE, Tables.TABLES.TABLE_TYPE)
+      .put(MetastoreColumn.METADATA_KEY, Tables.TABLES.METADATA_KEY)
+      .put(MetastoreColumn.METADATA_TYPE, Tables.TABLES.METADATA_TYPE)
+      .put(MetastoreColumn.LOCATION, Tables.TABLES.LOCATION)
+      .put(MetastoreColumn.INTERESTING_COLUMNS, 
Tables.TABLES.INTERESTING_COLUMNS)
+      .put(MetastoreColumn.SCHEMA, Tables.TABLES.SCHEMA)
+      .put(MetastoreColumn.COLUMNS_STATISTICS, Tables.TABLES.COLUMN_STATISTICS)
+      .put(MetastoreColumn.METADATA_STATISTICS, 
Tables.TABLES.METADATA_STATISTICS)
+      .put(MetastoreColumn.PARTITION_KEYS, Tables.TABLES.PARTITION_KEYS)
+      .put(MetastoreColumn.LAST_MODIFIED_TIME, 
Tables.TABLES.LAST_MODIFIED_TIME)
+      .put(MetastoreColumn.ADDITIONAL_METADATA, 
Tables.TABLES.ADDITIONAL_METADATA)
+      .build();

Review comment:
       Yes, our RDBMS Metastore is more like blob storage for large fields and 
regular storage for fields by which we do filter the data. Denormailization 
here was chosen deliberately taking into account semantics of the Metastore API 
implementation and the way data is stored / deleted and accessed. Having 
normalized structure will be an overhead since we mostly extract all data for 
table, the same way when we update data - we don't do update specific fields, 
we just erase all data and re-write.
   If we used normalized structure we would end up as HMS Metastore with many 
tables and relations but since we always need all data, we would have to do a 
lot of joins all the time. Since many queries to Metastore are generated at 
runtime, it would be a challenge to find framework that would handle this 
nicely, maybe Hibernate but still it won't cover all cases. 
   
   The same would apply for data update or deletion. First delete data from all 
data about the table then insert, update PR / FK relations. This would impact 
performance significantly.
   
   It's worth mentioning that Metastore API is generic to any type of storage: 
file based, RDBMS based or NoSQL based. This way sometimes to make sure all 
works we should sacrifice something, in our case normalization.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to