ivandika3 commented on code in PR #10588: URL: https://github.com/apache/ozone/pull/10588#discussion_r3481799241
########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/MultipartPartScanUtil.java: ########## @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om.request.s3.multipart; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; +import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.hdds.utils.db.cache.CacheKey; +import org.apache.hadoop.hdds.utils.db.cache.CacheValue; +import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmMultipartPartInfo; +import org.apache.hadoop.ozone.om.helpers.OmMultipartPartKey; +import org.apache.hadoop.ozone.om.helpers.QuotaUtil; + +/** + * Cache-aware scanner for multipart parts table rows. + */ +public final class MultipartPartScanUtil { Review Comment: Nit: I'd suggest to move this to `OMMultipartUploadUtils` so we only need a single util class for MPU. ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/MultipartPartScanUtil.java: ########## @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om.request.s3.multipart; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; +import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.hdds.utils.db.cache.CacheKey; +import org.apache.hadoop.hdds.utils.db.cache.CacheValue; +import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmMultipartPartInfo; +import org.apache.hadoop.ozone.om.helpers.OmMultipartPartKey; +import org.apache.hadoop.ozone.om.helpers.QuotaUtil; + +/** + * Cache-aware scanner for multipart parts table rows. + */ +public final class MultipartPartScanUtil { + + private MultipartPartScanUtil() { + } + + public static SortedMap<Integer, OmMultipartPartInfo> scanParts( + OMMetadataManager omMetadataManager, String uploadId) throws IOException { + SortedMap<Integer, OmMultipartPartInfo> parts = new TreeMap<>(); + OmMultipartPartKey prefix = OmMultipartPartKey.prefix(uploadId); + + try (TableIterator<OmMultipartPartKey, + ? extends Table.KeyValue<OmMultipartPartKey, OmMultipartPartInfo>> + iterator = omMetadataManager.getMultipartPartsTable().iterator(prefix)) { + while (iterator.hasNext()) { + Table.KeyValue<OmMultipartPartKey, OmMultipartPartInfo> kv = + iterator.next(); + if (kv == null) { + continue; + } + OmMultipartPartKey key = kv.getKey(); + if (!uploadId.equals(key.getUploadId())) { + break; + } + if (key.hasPartNumber()) { + parts.put(key.getPartNumber(), kv.getValue()); + } + } + } + + Iterator<Map.Entry<CacheKey<OmMultipartPartKey>, + CacheValue<OmMultipartPartInfo>>> cacheIterator = + omMetadataManager.getMultipartPartsTable().cacheIterator(); + while (cacheIterator.hasNext()) { + Map.Entry<CacheKey<OmMultipartPartKey>, CacheValue<OmMultipartPartInfo>> + cacheEntry = cacheIterator.next(); + OmMultipartPartKey key = cacheEntry.getKey().getCacheKey(); + if (!uploadId.equals(key.getUploadId()) || !key.hasPartNumber()) { + continue; + } + OmMultipartPartInfo value = cacheEntry.getValue().getCacheValue(); + if (value == null) { + parts.remove(key.getPartNumber()); + } else { + parts.put(key.getPartNumber(), value); + } + } + + return parts; Review Comment: Usually, in `OmMetadataReader` `listKeys` and `getMultipartUploadKeys`, we will iterate the cache first before the DB. Any reason here is reversed? Not saying it's wrong, just want to standardize. ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3MultipartUploadCommitPartRequest.java: ########## @@ -236,7 +265,9 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut Map<String, RepeatedOmKeyInfo> keyVersionsToDeleteMap = null; long correctedSpace = omKeyInfo.getReplicatedSize(); - if (null != oldPartKeyInfo) { + if (multipartKeyInfo.getSchemaVersion() + == OmMultipartKeyInfo.LEGACY_SCHEMA_VERSION + && null != oldPartKeyInfo) { Review Comment: Nit: A lot of unnecessary line wraps here which AI agent usually does. We can try to fix unnecessary wraps introduced in this patch as much as possible (within reason). ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3MultipartUploadCommitPartRequest.java: ########## @@ -370,6 +420,25 @@ private String getMultipartKey(String volumeName, String bucketName, keyName, uploadID); } + private OmMultipartPartKey getMultipartPartKey(String uploadId, + int partNumber) { + return OmMultipartPartKey.of(uploadId, partNumber); + } + + private void validateSplitPartInfo(OmKeyInfo omKeyInfo, int partNumber) + throws OMException { + if (StringUtils.isBlank(omKeyInfo.getMetadata().get(OzoneConsts.ETAG))) { + throw new OMException("Missing ETag for multipart upload part " + + partNumber, OMException.ResultCodes.INVALID_REQUEST); + } + if (omKeyInfo.getKeyLocationVersions() == null + || omKeyInfo.getKeyLocationVersions().isEmpty() + || omKeyInfo.getLatestVersionLocations().getLocationList().isEmpty()) { + throw new OMException("Missing block locations for multipart upload part " + + partNumber, OMException.ResultCodes.INVALID_REQUEST); + } + } Review Comment: Do we have this logic in the schema version 0? OM might allow uploading an empty part (although S3G might fail it early). ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3MultipartUploadCompleteRequest.java: ########## @@ -612,8 +657,26 @@ private long getMultipartDataSize(String requestedVolume, currentPartCount++; int partNumber = part.getPartNumber(); PartKeyInfo partKeyInfo = partKeyInfoMap.get(partNumber); - MultipartCommitRequestPart requestPart = eTagBasedValidationAvailable ? - eTagBasedValidator.apply(part, partKeyInfo) : partNameBasedValidator.apply(part, partKeyInfo); + OmMultipartPartInfo multipartPartInfo = + multipartPartInfoMap.get(partNumber); + MultipartCommitRequestPart requestPart; + if (multipartPartInfo != null) { + String requestPartId; + String omPartId; + if (eTagBasedValidationAvailable) { + requestPartId = part.getETag(); + omPartId = multipartPartInfo.getETag(); + } else { + requestPartId = part.getPartName(); + omPartId = multipartPartInfo.getPartName(); + } + requestPart = new MultipartCommitRequestPart( + requestPartId, omPartId, StringUtils.equals(requestPartId, omPartId)); Review Comment: Are we going to call the ETag validation here? ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3MultipartUploadCommitPartRequest.java: ########## @@ -370,6 +420,25 @@ private String getMultipartKey(String volumeName, String bucketName, keyName, uploadID); } + private OmMultipartPartKey getMultipartPartKey(String uploadId, + int partNumber) { + return OmMultipartPartKey.of(uploadId, partNumber); + } Review Comment: Nit: Let's make this inline instead of introducing another method. ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/s3/multipart/AbstractS3MultipartAbortResponse.java: ########## @@ -74,24 +75,34 @@ protected void addAbortToBatch( OmMultipartKeyInfo omMultipartKeyInfo = abortInfo .getOmMultipartKeyInfo(); - // Move all the parts to delete table - for (PartKeyInfo partKeyInfo: omMultipartKeyInfo.getPartKeyInfoMap()) { - OmKeyInfo currentKeyPartInfo = - OmKeyInfo.getFromProtobuf(partKeyInfo.getPartKeyInfo()); + if (omMultipartKeyInfo.getSchemaVersion() + == OmMultipartKeyInfo.LEGACY_SCHEMA_VERSION) { + // Move all the parts to delete table + for (PartKeyInfo partKeyInfo: omMultipartKeyInfo.getPartKeyInfoMap()) { + OmKeyInfo currentKeyPartInfo = + OmKeyInfo.getFromProtobuf(partKeyInfo.getPartKeyInfo()); - // TODO: Similar to open key deletion response, we can check if the - // MPU part actually contains blocks, and only move the to - // deletedTable if it does. + // TODO: Similar to open key deletion response, we can check if the + // MPU part actually contains blocks, and only move the to + // deletedTable if it does. - RepeatedOmKeyInfo repeatedOmKeyInfo = OmUtils.prepareKeyForDelete(omBucketInfo.getObjectID(), - currentKeyPartInfo, omMultipartKeyInfo.getUpdateID()); + addPartToDeletedTable(omMetadataManager, batchOperation, + omBucketInfo, abortInfo, currentKeyPartInfo, + omMultipartKeyInfo.getUpdateID()); + } + } else { + for (OmKeyInfo currentKeyPartInfo : + abortInfo.getPartsKeyInfoToDelete()) { + addPartToDeletedTable(omMetadataManager, batchOperation, + omBucketInfo, abortInfo, currentKeyPartInfo, + omMultipartKeyInfo.getUpdateID()); + } - // multi-part key format is volumeName/bucketName/keyName/uploadId - String deleteKey = omMetadataManager.getOzoneDeletePathKey( - currentKeyPartInfo.getObjectID(), abortInfo.getMultipartKey()); - - omMetadataManager.getDeletedTable().putWithBatch(batchOperation, - deleteKey, repeatedOmKeyInfo); + for (OmMultipartPartKey partKey : + abortInfo.getPartsTableKeysToDelete()) { + omMetadataManager.getMultipartPartsTable().deleteWithBatch( + batchOperation, partKey); + } Review Comment: Just a thought, but if it's possible we can use deleteRange in the future to reduce the tombstones. However, deleteRange need to be done carefully since it can invalidate valid DB entries. ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/s3/multipart/S3MultipartUploadCommitPartResponse.java: ########## @@ -66,15 +71,19 @@ public class S3MultipartUploadCommitPartResponse extends OmKeyResponse { public S3MultipartUploadCommitPartResponse(@Nonnull OMResponse omResponse, String multipartKey, String openKey, @Nullable OmMultipartKeyInfo omMultipartKeyInfo, + @Nullable OmMultipartPartKey multipartPartKey, + @Nullable OmMultipartPartInfo omMultipartPartInfo, @Nullable Map<String, RepeatedOmKeyInfo> keyToDeleteMap, @Nullable OmKeyInfo openPartKeyInfoToBeDeleted, @Nonnull OmBucketInfo omBucketInfo, long bucketId, @Nonnull BucketLayout bucketLayout) { super(omResponse, bucketLayout); this.multipartKey = multipartKey; + this.multipartPartKey = multipartPartKey; this.openKey = openKey; this.omMultipartKeyInfo = omMultipartKeyInfo; + this.omMultipartPartInfo = omMultipartPartInfo; Review Comment: We might want to add a precondition here that if `mutlipartPartKey` and `omMultipartPartInfo` need to either be both null or both not null. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
