xloya commented on code in PR #4019: URL: https://github.com/apache/gravitino/pull/4019#discussion_r1666370540
########## core/src/main/java/com/datastrato/gravitino/storage/relational/mapper/TagMetadataObjectRelMapper.java: ########## @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.datastrato.gravitino.storage.relational.mapper; + +import org.apache.ibatis.annotations.Delete; +import org.apache.ibatis.annotations.Param; +import org.apache.ibatis.annotations.Update; + +public interface TagMetadataObjectRelMapper { + String TAG_METADATA_OBJECT_RELATION_TABLE_NAME = "tag_relation_meta"; + + @Update( + "UPDATE " + + TAG_METADATA_OBJECT_RELATION_TABLE_NAME + + " tmo SET tmo.deleted_at = (UNIX_TIMESTAMP() * 1000.0)" + + " + EXTRACT(MICROSECOND FROM CURRENT_TIMESTAMP(3)) / 1000" + + " WHERE tmo.tag_id IN (SELECT tm.tag_id FROM " + + TagMetaMapper.TAG_TABLE_NAME + + " tm WHERE tm.metalake_id IN (SELECT mm.metalake_id FROM " + + MetalakeMetaMapper.TABLE_NAME + + " mm WHERE mm.metalake_name = #{metalakeName} AND mm.deleted_at = 0)" + + " AND tm.deleted_at = 0) AND tmo.deleted_at = 0") + Integer softDeleteTagMetadataObjectRelsByMetalakeAndTagName( + @Param("metalakeName") String metalakeName, @Param("tagName") String tagName); + + @Update( + "UPDATE " + + TAG_METADATA_OBJECT_RELATION_TABLE_NAME + + " tmo SET tmo.deleted_at = (UNIX_TIMESTAMP() * 1000.0)" + + " + EXTRACT(MICROSECOND FROM CURRENT_TIMESTAMP(3)) / 1000" + + " WHERE EXISTS ( SELECT * FROM " Review Comment: Unnecessary space before `SELECT`. ########## core/src/main/java/com/datastrato/gravitino/storage/relational/service/TagMetaService.java: ########## @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.datastrato.gravitino.storage.relational.service; + +import com.datastrato.gravitino.Entity; +import com.datastrato.gravitino.HasIdentifier; +import com.datastrato.gravitino.NameIdentifier; +import com.datastrato.gravitino.Namespace; +import com.datastrato.gravitino.exceptions.NoSuchEntityException; +import com.datastrato.gravitino.meta.TagEntity; +import com.datastrato.gravitino.storage.relational.mapper.TagMetaMapper; +import com.datastrato.gravitino.storage.relational.mapper.TagMetadataObjectRelMapper; +import com.datastrato.gravitino.storage.relational.po.TagPO; +import com.datastrato.gravitino.storage.relational.utils.ExceptionUtils; +import com.datastrato.gravitino.storage.relational.utils.POConverters; +import com.datastrato.gravitino.storage.relational.utils.SessionUtils; +import com.google.common.base.Preconditions; +import java.io.IOException; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Function; +import java.util.stream.Collectors; + +public class TagMetaService { + + private static final TagMetaService INSTANCE = new TagMetaService(); + + public static TagMetaService getInstance() { + return INSTANCE; + } + + private TagMetaService() {} + + public List<TagEntity> listTagsByNamespace(Namespace ns) { + String metalakeName = ns.level(0); + List<TagPO> tagPOs = + SessionUtils.getWithoutCommit( + TagMetaMapper.class, mapper -> mapper.listTagPOsByMetalake(metalakeName)); + return tagPOs.stream() + .map(tagPO -> POConverters.fromTagPO(tagPO, ns)) + .collect(Collectors.toList()); + } + + public TagEntity getTagByIdentifier(NameIdentifier ident) { + String metalakeName = ident.namespace().level(0); + TagPO tagPO = getTagPOByMetalakeAndName(metalakeName, ident.name()); + return POConverters.fromTagPO(tagPO, ident.namespace()); + } + + public void insertTag(TagEntity tagEntity, boolean overwritten) throws IOException { + Namespace ns = tagEntity.namespace(); + String metalakeName = ns.level(0); + AtomicReference<Long> metalakeId = new AtomicReference<>(null); + + try { + SessionUtils.doMultipleWithCommit( + () -> metalakeId.set(MetalakeMetaService.getInstance().getMetalakeIdByName(metalakeName)), Review Comment: According to the current design, I think we'd better add locks in the upper layer to avoid duplication of internal locks and database locks. ########## scripts/mysql/schema-0.6.0-mysql.sql: ########## @@ -208,4 +208,33 @@ CREATE TABLE IF NOT EXISTS `group_role_rel` ( PRIMARY KEY (`id`), UNIQUE KEY `uk_gi_ri_del` (`group_id`, `role_id`, `deleted_at`), KEY `idx_rid` (`group_id`) - ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'group role relation'; \ No newline at end of file + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'group role relation'; + +CREATE TABLE IF NOT EXISTS `tag_meta` ( + `tag_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'tag id', + `tag_name` VARCHAR(128) NOT NULL COMMENT 'tag name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `tag_comment` VARCHAR(256) DEFAULT '' COMMENT 'tag comment', + `properties` MEDIUMTEXT DEFAULT NULL COMMENT 'tag properties', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'tag audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'tag current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'tag last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'tag deleted at', + PRIMARY KEY (`tag_id`), + UNIQUE KEY `uk_mi_tn_del` (`metalake_id`, `tag_name`, `deleted_at`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'tag metadata'; + +CREATE TABLE IF NOT EXISTS `tag_relation_meta` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `tag_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'tag id', + `metadata_object_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metadata object id', + `metadata_object_type` VARCHAR(64) NOT NULL COMMENT 'metadata object type', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'tag relation audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'tag relation current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'tag relation last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'tag relation deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_ti_mi_del` (`tag_id`, `metadata_object_id`, `deleted_at`), + KEY `idx_tid` (`tag_id`), + KEY `idx_mid` (`metadata_object_id`) Review Comment: I don't see any query using this condition in the current SQL, so it seems that this index is unnecessary now. ########## core/src/main/java/com/datastrato/gravitino/storage/relational/mapper/TagMetadataObjectRelMapper.java: ########## @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.datastrato.gravitino.storage.relational.mapper; + +import org.apache.ibatis.annotations.Delete; +import org.apache.ibatis.annotations.Param; +import org.apache.ibatis.annotations.Update; + +public interface TagMetadataObjectRelMapper { + String TAG_METADATA_OBJECT_RELATION_TABLE_NAME = "tag_relation_meta"; + + @Update( + "UPDATE " + + TAG_METADATA_OBJECT_RELATION_TABLE_NAME + + " tmo SET tmo.deleted_at = (UNIX_TIMESTAMP() * 1000.0)" + + " + EXTRACT(MICROSECOND FROM CURRENT_TIMESTAMP(3)) / 1000" + + " WHERE tmo.tag_id IN (SELECT tm.tag_id FROM " + + TagMetaMapper.TAG_TABLE_NAME + + " tm WHERE tm.metalake_id IN (SELECT mm.metalake_id FROM " Review Comment: How about using `JOIN` in the sub queries here? As far as I know, the performance of the `IN` condition will be poor when the amount of data is large, so we'd better try to reduce the use of `IN`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
