Siyao Meng created HDDS-6261:
--------------------------------
Summary: OM crashes when trying to overwrite a key during upgrade
downgrade testing
Key: HDDS-6261
URL: https://issues.apache.org/jira/browse/HDDS-6261
Project: Apache Ozone
Issue Type: Bug
Components: Ozone Manager
Affects Versions: 1.3.0
Reporter: Siyao Meng
While working on HDDS-6084 (related to upgrade acceptance testing), [~erose]
and I found that if:
1) a key is created with a new OM version (1.3.0)
2) downgrade to OM 1.1.0
3) try to overwrite the key created in (1)
3) will result in all 3 OMs crashing.
The issue seems to be introduced in the unreleased branch of 1.3.0. The same
issue cannot be reproduced in 1.1.0 to 1.2.0 upgrade/downgrade testing. This
could indicate some unreleased changes has broken the key versioning after the
downgrade. (Could well be an incompatible change. Need further investigation.)
{code}
om2_1 | 2022-02-03 21:36:15,228 [OM StateMachine ApplyTransaction Thread -
0] ERROR ratis.OzoneManagerStateMachine: Terminating with exit status 1:
Request cmdType: CreateKey
om2_1 | traceID: ""
om2_1 | clientId: "client-72B024AF247D"
om2_1 | userInfo {
om2_1 | userName: "dlfknslnfslf"
om2_1 | remoteAddress: "10.9.0.19"
om2_1 | hostName: "ha_s3g_1.ha_net"
om2_1 | }
om2_1 | version: 1
om2_1 | createKeyRequest {
om2_1 | keyArgs {
om2_1 | volumeName: "s3v"
om2_1 | bucketName: "old1-bucket"
om2_1 | keyName: "key2"
om2_1 | dataSize: 17539
om2_1 | type: RATIS
om2_1 | factor: THREE
om2_1 | keyLocations {
om2_1 | blockID {
om2_1 | containerBlockID {
om2_1 | containerID: 1
om2_1 | localID: 107736214721200128
om2_1 | }
om2_1 | blockCommitSequenceId: 0
om2_1 | }
om2_1 | offset: 0
om2_1 | length: 268435456
om2_1 | createVersion: 0
om2_1 | pipeline {
om2_1 | members {
om2_1 | uuid: "b92bf4c8-3b0c-40b0-bb2b-05b6d3594e13"
om2_1 | ipAddress: "10.9.0.16"
om2_1 | hostName: "ha_dn2_1.ha_net"
om2_1 | ports {
om2_1 | name: "REPLICATION"
om2_1 | value: 9886
om2_1 | }
om2_1 | ports {
om2_1 | name: "RATIS"
om2_1 | value: 9858
om2_1 | }
om2_1 | ports {
om2_1 | name: "RATIS_ADMIN"
om2_1 | value: 9857
om2_1 | }
om2_1 | ports {
om2_1 | name: "RATIS_SERVER"
om2_1 | value: 9856
om2_1 | }
om2_1 | ports {
om2_1 | name: "STANDALONE"
om2_1 | value: 9859
om2_1 | }
om2_1 | networkName: "b92bf4c8-3b0c-40b0-bb2b-05b6d3594e13"
om2_1 | networkLocation: "/default-rack"
om2_1 | persistedOpState: IN_SERVICE
om2_1 | persistedOpStateExpiry: 0
om2_1 | uuid128 {
om2_1 | mostSigBits: -5103716611873029968
om2_1 | leastSigBits: -4959864281830437357
om2_1 | }
om2_1 | }
om2_1 | members {
om2_1 | uuid: "f0b7e615-d4ee-4ec4-a6b5-ec68b82c07e9"
om2_1 | ipAddress: "10.9.0.15"
om2_1 | hostName: "ha_dn1_1.ha_net"
om2_1 | ports {
om2_1 | name: "REPLICATION"
om2_1 | value: 9886
om2_1 | }
om2_1 | ports {
om2_1 | name: "RATIS"
om2_1 | value: 9858
om2_1 | }
om2_1 | ports {
om2_1 | name: "RATIS_ADMIN"
om2_1 | value: 9857
om2_1 | }
om2_1 | ports {
om2_1 | name: "RATIS_SERVER"
om2_1 | value: 9856
om2_1 | }
om2_1 | ports {
om2_1 | name: "STANDALONE"
om2_1 | value: 9859
om2_1 | }
om2_1 | networkName: "f0b7e615-d4ee-4ec4-a6b5-ec68b82c07e9"
om2_1 | networkLocation: "/default-rack"
om2_1 | persistedOpState: IN_SERVICE
om2_1 | persistedOpStateExpiry: 0
om2_1 | uuid128 {
om2_1 | mostSigBits: -1101158602427707708
om2_1 | leastSigBits: -6433976558118238231
om2_1 | }
om2_1 | }
om2_1 | members {
om2_1 | uuid: "c7912312-811d-469d-8c40-c739cd2a1e62"
om2_1 | ipAddress: "10.9.0.17"
om2_1 | hostName: "ha_dn3_1.ha_net"
om2_1 | ports {
om2_1 | name: "REPLICATION"
om2_1 | value: 9886
om2_1 | }
om2_1 | ports {
om2_1 | name: "RATIS"
om2_1 | value: 9858
om2_1 | }
om2_1 | ports {
om2_1 | name: "RATIS_ADMIN"
om2_1 | value: 9857
om2_1 | }
om2_1 | ports {
om2_1 | name: "RATIS_SERVER"
om2_1 | value: 9856
om2_1 | }
om2_1 | ports {
om2_1 | name: "STANDALONE"
om2_1 | value: 9859
om2_1 | }
om2_1 | networkName: "c7912312-811d-469d-8c40-c739cd2a1e62"
om2_1 | networkLocation: "/default-rack"
om2_1 | persistedOpState: IN_SERVICE
om2_1 | persistedOpStateExpiry: 0
om2_1 | uuid128 {
om2_1 | mostSigBits: -4066430426156284259
om2_1 | leastSigBits: -8340447458821005726
om2_1 | }
om2_1 | }
om2_1 | state: PIPELINE_OPEN
om2_1 | type: RATIS
om2_1 | factor: THREE
om2_1 | id {
om2_1 | id: "c0b6f272-9a39-4dc3-ada8-c3b833cc6e17"
om2_1 | uuid128 {
om2_1 | mostSigBits: -4560190998638408253
om2_1 | leastSigBits: -5933277313150194153
om2_1 | }
om2_1 | }
om2_1 | leaderID: "f0b7e615-d4ee-4ec4-a6b5-ec68b82c07e9"
om2_1 | creationTimeStamp: 1643924132125
om2_1 | suggestedLeaderID {
om2_1 | mostSigBits: -1101158602427707708
om2_1 | leastSigBits: -6433976558118238231
om2_1 | }
om2_1 | leaderID128 {
om2_1 | mostSigBits: -1101158602427707708
om2_1 | leastSigBits: -6433976558118238231
om2_1 | }
om2_1 | }
om2_1 | partNumber: 0
om2_1 | }
om2_1 | isMultipartKey: false
om2_1 | acls {
om2_1 | type: USER
om2_1 | name: "dlfknslnfslf"
om2_1 | rights: "\200"
om2_1 | aclScope: ACCESS
om2_1 | }
om2_1 | modificationTime: 1643924174840
om2_1 | }
om2_1 | clientID: 107736214722445312
om2_1 | }
om2_1 | failed with exception
om2_1 | java.lang.IllegalArgumentException
om2_1 | at
com.google.common.base.Preconditions.checkArgument(Preconditions.java:128)
om2_1 | at
org.apache.hadoop.ozone.om.helpers.OmKeyInfo.<init>(OmKeyInfo.java:81)
om2_1 | at
org.apache.hadoop.ozone.om.helpers.OmKeyInfo$Builder.build(OmKeyInfo.java:378)
om2_1 | at
org.apache.hadoop.ozone.om.helpers.OmKeyInfo.getFromProtobuf(OmKeyInfo.java:460)
om2_1 | at
org.apache.hadoop.ozone.om.codec.OmKeyInfoCodec.fromPersistedFormat(OmKeyInfoCodec.java:59)
om2_1 | at
org.apache.hadoop.ozone.om.codec.OmKeyInfoCodec.fromPersistedFormat(OmKeyInfoCodec.java:36)
om2_1 | at
org.apache.hadoop.hdds.utils.db.CodecRegistry.asObject(CodecRegistry.java:55)
om2_1 | at
org.apache.hadoop.hdds.utils.db.TypedTable.getFromTableIfExist(TypedTable.java:261)
om2_1 | at
org.apache.hadoop.hdds.utils.db.TypedTable.getIfExist(TypedTable.java:248)
om2_1 | at
org.apache.hadoop.ozone.om.request.key.OMKeyCreateRequest.validateAndUpdateCache(OMKeyCreateRequest.java:236)
om2_1 | at
org.apache.hadoop.ozone.protocolPB.OzoneManagerRequestHandler.handleWriteRequest(OzoneManagerRequestHandler.java:227)
om2_1 | at
org.apache.hadoop.ozone.om.ratis.OzoneManagerStateMachine.runCommand(OzoneManagerStateMachine.java:415)
om2_1 | at
org.apache.hadoop.ozone.om.ratis.OzoneManagerStateMachine.lambda$applyTransaction$1(OzoneManagerStateMachine.java:240)
om2_1 | at
java.base/java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1700)
om2_1 | at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
om2_1 | at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
om2_1 | at java.base/java.lang.Thread.run(Thread.java:834)
om2_1 | 2022-02-03 21:36:15,253 [shutdown-hook-0] INFO
om.OzoneManagerStarter: SHUTDOWN_MSG:
om2_1 | /************************************************************
om2_1 | SHUTDOWN_MSG: Shutting down OzoneManager at a250845831a7/10.9.0.12
om2_1 | ************************************************************/
{code}
--
This message was sent by Atlassian Jira
(v8.20.1#820001)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]