[
https://issues.apache.org/jira/browse/HDDS-12131?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Kohei Sugihara updated HDDS-12131:
----------------------------------
Description:
OM throws NullPointerException (NPE) on S3MultipartUploadCompleteRequestWithFSO
when it satisfies the following conditions:
* Apache Ozone 1.4.1
* Versioning disabled && FSO enabled buckets
* Overwrite an empty file with another non-zero file using multipart upload
Reproduce:
{code:java}
# experiment environment
% cat ~/.aws/config
[default]
s3 =
max_concurrent_requests = 1
multipart_chunksize = 8MB
multipart_threshold = 8MB
# create an empty file and upload it to the cluster
% touch empty-file
% aws s3 --endpoint https://<s3g>/ --profile pfs0 cp ./empty-file
s3://ksugihara/
upload: ./empty-file to s3://ksugihara/empty-file
# create some large files (>= 8MB) to execute multipart upload via awscli
% dd if=/dev/urandom of=bin-16m.dat count=16 bs=1M
16+0 records in
16+0 records out
16777216 bytes (17 MB, 16 MiB) copied, 0.0375582 s, 447 MB/s
# upload it
% aws s3 --endpoint https://<s3g>/ --profile pfs0 ls s3://ksugihara/
2025-01-22 17:06:20 1048576 bin-1m.dat
2025-01-24 00:46:12 0 empty-file
% aws s3 --endpoint https://<s3g>/ --profile pfs0 cp ./bin-16m.dat
s3://ksugihara/empty-file
upload: ./bin-16m.dat to s3://ksugihara/empty-file
% aws s3 --endpoint https://<s3g>/ --profile pfs0 ls s3://ksugihara/
^[[A^[[B <--- not respond {code}
OM crashes with NPE:
{code:java}
2025-01-24 01:01:55,816 [om1-OMDoubleBufferFlushThread] ERROR
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer: Terminating with
exit status 2: During flush to DB encountered error in OMDoubleBuffer flush
thread om1-OMDoubleBufferFlushThread when handling OMRequest: cmdType:
CompleteMultiPartUpload
traceID: "2329cf21ba56a55f:16a79975cec73b09:2329cf21ba56a55f:0"
success: true
status: OK
completeMultiPartUploadResponse {
volume: "s3v"
bucket: "ksugihara"
key: "empty-file"
hash: "e2c0bb7321f4613fb122b0a25bafda89-2"
}
java.lang.NullPointerException: Cannot invoke
"org.apache.hadoop.ozone.om.helpers.OmBucketInfo.getVolumeName()" because the
return value of
"org.apache.hadoop.ozone.om.response.s3.multipart.S3MultipartUploadCompleteResponseWithFSO.getOmBucketInfo()"
is null
at
org.apache.hadoop.ozone.om.response.s3.multipart.S3MultipartUploadCompleteResponseWithFSO.addToDBBatch(S3MultipartUploadCompleteResponseWithFSO.java:107)
at
org.apache.hadoop.ozone.om.response.OMClientResponse.checkAndUpdateDB(OMClientResponse.java:75)
at
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.lambda$5(OzoneManagerDoubleBuffer.java:429)
at
org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:184)
at
org.apache.hadoop.hdds.tracing.TracingUtil.executeAsChildSpan(TracingUtil.java:208)
at
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.addToBatchWithTrace(OzoneManagerDoubleBuffer.java:262)
at
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.addToBatch(OzoneManagerDoubleBuffer.java:428)
at
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.flushBatch(OzoneManagerDoubleBuffer.java:355)
at
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.flushCurrentBuffer(OzoneManagerDoubleBuffer.java:334)
at
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.flushTransactions(OzoneManagerDoubleBuffer.java:299)
at java.base/java.lang.Thread.run(Thread.java:833)
2025-01-24 01:01:55,819 [shutdown-hook-0] INFO
org.apache.hadoop.ozone.om.OzoneManagerStarter: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down OzoneManager at <om-server>
************************************************************/
2025-01-24 01:01:55,832 [main] INFO
org.apache.hadoop.hdds.server.http.HttpServer2: Excluded Cipher List:
^TLS_DHE.*$,^.*SHA$,^TLS_RSA_WITH.*$,^.*MD5.*$,^TLS_DH_.*$,^.*RC4.*$,^.*CCM.*$
2025-01-24 01:01:55,837 [main] INFO
org.apache.hadoop.hdds.server.http.BaseHttpServer: HTTP server of ozoneManager
uses base directory /data/ozone/metadata/webserver
2025-01-24 01:01:57,454 [main] INFO
org.apache.hadoop.ozone.om.OzoneManagerStarter: STARTUP_MSG:
/************************************************************ {code}
was:
OM throws NullPointerException (NPE) on S3MultipartUploadCompleteRequestWithFSO
when it satisfies the following conditions:
* Apache Ozone 1.4.1
* Versioning disabled && FSO enabled buckets
* Overwrite an empty file with another non-zero file using multipart upload
Reproduce:
{code:java}
# experiment environment
% cat ~/.aws/config
[default]
s3 =
max_concurrent_requests = 1
multipart_chunksize = 8MB
multipart_threshold = 8MB
# create an empty file and upload it to the cluster
% touch empty-file
% aws s3 --endpoint https://<s3g>/ --profile pfs0 cp ./empty-file
s3://ksugihara/
upload: ./empty-file to s3://ksugihara/empty-file
# create some large files (>= 8MB) to execute multipart upload via awscli
% dd if=/dev/urandom of=bin-16m.dat count=16 bs=1M
16+0 records in
16+0 records out
16777216 bytes (17 MB, 16 MiB) copied, 0.0375582 s, 447 MB/s
# upload it
% aws s3 --endpoint https://<s3g>/ --profile pfs0 ls s3://ksugihara/
2025-01-22 17:06:20 1048576 bin-1m.dat
2025-01-24 00:46:12 0 empty-file
% aws s3 --endpoint https://<s3g>/ --profile pfs0 cp ./bin-16m.dat
s3://ksugihara/empty-file
upload: ./bin-16m.dat to s3://ksugihara/empty-file
% aws s3 --endpoint https://<s3g>/ --profile pfs0 ls s3://ksugihara/
^[[A^[[B <--- not respond {code}
OM crashes with NPE:
{code:java}
2025-01-24 01:01:55,816 [om1-OMDoubleBufferFlushThread] ERROR
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer: Terminating with
exit status 2: During flush to DB encountered error in OMDoubleBuffer flush
thread om1-OMDoubleBufferFlushThread when handling OMRequest: cmdType:
CompleteMultiPartUpload
traceID: "2329cf21ba56a55f:16a79975cec73b09:2329cf21ba56a55f:0"
success: true
status: OK
completeMultiPartUploadResponse {
volume: "s3v"
bucket: "ksugihara"
key: "empty-file"
hash: "e2c0bb7321f4613fb122b0a25bafda89-2"
}
java.lang.NullPointerException: Cannot invoke
"org.apache.hadoop.ozone.om.helpers.OmBucketInfo.getVolumeName()" because the
return value of
"org.apache.hadoop.ozone.om.response.s3.multipart.S3MultipartUploadCompleteResponseWithFSO.getOmBucketInfo()"
is null
at
org.apache.hadoop.ozone.om.response.s3.multipart.S3MultipartUploadCompleteResponseWithFSO.addToDBBatch(S3MultipartUploadCompleteResponseWithFSO.java:107)
at
org.apache.hadoop.ozone.om.response.OMClientResponse.checkAndUpdateDB(OMClientResponse.java:75)
at
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.lambda$5(OzoneManagerDoubleBuffer.java:429)
at
org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:184)
at
org.apache.hadoop.hdds.tracing.TracingUtil.executeAsChildSpan(TracingUtil.java:208)
at
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.addToBatchWithTrace(OzoneManagerDoubleBuffer.java:262)
at
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.addToBatch(OzoneManagerDoubleBuffer.java:428)
at
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.flushBatch(OzoneManagerDoubleBuffer.java:355)
at
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.flushCurrentBuffer(OzoneManagerDoubleBuffer.java:334)
at
org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.flushTransactions(OzoneManagerDoubleBuffer.java:299)
at java.base/java.lang.Thread.run(Thread.java:833)
2025-01-24 01:01:55,819 [shutdown-hook-0] INFO
org.apache.hadoop.ozone.om.OzoneManagerStarter: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down OzoneManager at <om-server>
************************************************************/
2025-01-24 01:01:55,832 [main] INFO
org.apache.hadoop.hdds.server.http.HttpServer2: Excluded Cipher List:
^TLS_DHE.*$,^.*SHA$,^TLS_RSA_WITH.*$,^.*MD5.*$,^TLS_DH_.*$,^.*RC4.*$,^.*CCM.*$
2025-01-24 01:01:55,837 [main] INFO
org.apache.hadoop.hdds.server.http.BaseHttpServer: HTTP server of ozoneManager
uses base directory /data/ozone/metadata/webserver
2025-01-24 01:01:57,454 [main] INFO
org.apache.hadoop.ozone.om.OzoneManagerStarter: STARTUP_MSG:
/************************************************************ {code}
> Overwrite an empty file with multipart-upload occurs NPE
> --------------------------------------------------------
>
> Key: HDDS-12131
> URL: https://issues.apache.org/jira/browse/HDDS-12131
> Project: Apache Ozone
> Issue Type: Bug
> Components: OM
> Affects Versions: 1.4.1
> Environment: Apache Ozone 1.4.1 with FSO-enabled & non-versioning
> bucket
> Reporter: Kohei Sugihara
> Priority: Major
>
> OM throws NullPointerException (NPE) on
> S3MultipartUploadCompleteRequestWithFSO when it satisfies the following
> conditions:
> * Apache Ozone 1.4.1
> * Versioning disabled && FSO enabled buckets
> * Overwrite an empty file with another non-zero file using multipart upload
> Reproduce:
> {code:java}
> # experiment environment
> % cat ~/.aws/config
> [default]
> s3 =
> max_concurrent_requests = 1
> multipart_chunksize = 8MB
> multipart_threshold = 8MB
> # create an empty file and upload it to the cluster
> % touch empty-file
> % aws s3 --endpoint https://<s3g>/ --profile pfs0 cp ./empty-file
> s3://ksugihara/
> upload: ./empty-file to s3://ksugihara/empty-file
> # create some large files (>= 8MB) to execute multipart upload via awscli
> % dd if=/dev/urandom of=bin-16m.dat count=16 bs=1M
> 16+0 records in
> 16+0 records out
> 16777216 bytes (17 MB, 16 MiB) copied, 0.0375582 s, 447 MB/s
> # upload it
> % aws s3 --endpoint https://<s3g>/ --profile pfs0 ls s3://ksugihara/
> 2025-01-22 17:06:20 1048576 bin-1m.dat
> 2025-01-24 00:46:12 0 empty-file
> % aws s3 --endpoint https://<s3g>/ --profile pfs0 cp ./bin-16m.dat
> s3://ksugihara/empty-file
> upload: ./bin-16m.dat to s3://ksugihara/empty-file
> % aws s3 --endpoint https://<s3g>/ --profile pfs0 ls s3://ksugihara/
> ^[[A^[[B <--- not respond {code}
> OM crashes with NPE:
> {code:java}
> 2025-01-24 01:01:55,816 [om1-OMDoubleBufferFlushThread] ERROR
> org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer: Terminating with
> exit status 2: During flush to DB encountered error in OMDoubleBuffer flush
> thread om1-OMDoubleBufferFlushThread when handling OMRequest: cmdType:
> CompleteMultiPartUpload
> traceID: "2329cf21ba56a55f:16a79975cec73b09:2329cf21ba56a55f:0"
> success: true
> status: OK
> completeMultiPartUploadResponse {
> volume: "s3v"
> bucket: "ksugihara"
> key: "empty-file"
> hash: "e2c0bb7321f4613fb122b0a25bafda89-2"
> }
> java.lang.NullPointerException: Cannot invoke
> "org.apache.hadoop.ozone.om.helpers.OmBucketInfo.getVolumeName()" because the
> return value of
> "org.apache.hadoop.ozone.om.response.s3.multipart.S3MultipartUploadCompleteResponseWithFSO.getOmBucketInfo()"
> is null
> at
> org.apache.hadoop.ozone.om.response.s3.multipart.S3MultipartUploadCompleteResponseWithFSO.addToDBBatch(S3MultipartUploadCompleteResponseWithFSO.java:107)
> at
> org.apache.hadoop.ozone.om.response.OMClientResponse.checkAndUpdateDB(OMClientResponse.java:75)
> at
> org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.lambda$5(OzoneManagerDoubleBuffer.java:429)
> at
> org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:184)
> at
> org.apache.hadoop.hdds.tracing.TracingUtil.executeAsChildSpan(TracingUtil.java:208)
> at
> org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.addToBatchWithTrace(OzoneManagerDoubleBuffer.java:262)
> at
> org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.addToBatch(OzoneManagerDoubleBuffer.java:428)
> at
> org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.flushBatch(OzoneManagerDoubleBuffer.java:355)
> at
> org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.flushCurrentBuffer(OzoneManagerDoubleBuffer.java:334)
> at
> org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.flushTransactions(OzoneManagerDoubleBuffer.java:299)
> at java.base/java.lang.Thread.run(Thread.java:833)
> 2025-01-24 01:01:55,819 [shutdown-hook-0] INFO
> org.apache.hadoop.ozone.om.OzoneManagerStarter: SHUTDOWN_MSG:
> /************************************************************
> SHUTDOWN_MSG: Shutting down OzoneManager at <om-server>
> ************************************************************/
> 2025-01-24 01:01:55,832 [main] INFO
> org.apache.hadoop.hdds.server.http.HttpServer2: Excluded Cipher List:
> ^TLS_DHE.*$,^.*SHA$,^TLS_RSA_WITH.*$,^.*MD5.*$,^TLS_DH_.*$,^.*RC4.*$,^.*CCM.*$
> 2025-01-24 01:01:55,837 [main] INFO
> org.apache.hadoop.hdds.server.http.BaseHttpServer: HTTP server of
> ozoneManager uses base directory /data/ozone/metadata/webserver
> 2025-01-24 01:01:57,454 [main] INFO
> org.apache.hadoop.ozone.om.OzoneManagerStarter: STARTUP_MSG:
> /************************************************************ {code}
>
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]