[
https://issues.apache.org/jira/browse/HDDS-10853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Wei-Chiu Chuang updated HDDS-10853:
-----------------------------------
Description:
I noticed two incompatible changes in protobuf fields introduced by HDDS-7509
and HDDS-7952.
HDDS-7509 changed the SnapshotInfo fields snapshotID, pathPreviousSnapshotID
and globalPreviousSnapshotID from string to UUID.
HDDS-7952 overhauled the snapshot diff job db.
Sharing the error stack traces for posterity:
HDDS-7509
{noformat}
2024-05-10 21:52:48,805 ERROR
[main]-org.apache.hadoop.ozone.om.OzoneManagerStarter: OM start failed with
exception
java.lang.IllegalStateException: Failed next()
at
org.apache.hadoop.hdds.utils.db.TypedTable$RawIterator.next(TypedTable.java:670)
at
org.apache.hadoop.hdds.utils.db.TypedTable$RawIterator.next(TypedTable.java:619)
at
org.apache.hadoop.ozone.om.SnapshotChainManager.loadFromSnapshotInfoTable(SnapshotChainManager.java:295)
at
org.apache.hadoop.ozone.om.SnapshotChainManager.<init>(SnapshotChainManager.java:66)
at
org.apache.hadoop.ozone.om.OmMetadataManagerImpl.start(OmMetadataManagerImpl.java:558)
at
org.apache.hadoop.ozone.om.OmMetadataManagerImpl.<init>(OmMetadataManagerImpl.java:335)
at
org.apache.hadoop.ozone.om.OzoneManager.instantiateServices(OzoneManager.java:794)
at org.apache.hadoop.ozone.om.OzoneManager.<init>(OzoneManager.java:674)
at
org.apache.hadoop.ozone.om.OzoneManager.createOm(OzoneManager.java:759)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter$OMStarterHelper.start(OzoneManagerStarter.java:189)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter.startOm(OzoneManagerStarter.java:86)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter.call(OzoneManagerStarter.java:74)
at org.apache.hadoop.hdds.cli.GenericCli.call(GenericCli.java:38)
at picocli.CommandLine.executeUserObject(CommandLine.java:1953)
at picocli.CommandLine.access$1300(CommandLine.java:145)
at
picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2346)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2311)
at
picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179)
at picocli.CommandLine.execute(CommandLine.java:2078)
at org.apache.hadoop.hdds.cli.GenericCli.execute(GenericCli.java:100)
at org.apache.hadoop.hdds.cli.GenericCli.run(GenericCli.java:91)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter.main(OzoneManagerStarter.java:58)
Caused by: com.google.protobuf.InvalidProtocolBufferException: While parsing a
protocol message, the input ended unexpectedly in the middle of a field. This
could mean either than the input has been truncated or that an embedded message
misreported its own length.
at
com.google.protobuf.InvalidProtocolBufferException.truncatedMessage(InvalidProtocolBufferException.java:70)
at
com.google.protobuf.CodedInputStream.readRawBytes(CodedInputStream.java:789)
at
com.google.protobuf.CodedInputStream.readBytes(CodedInputStream.java:329)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:484)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:461)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:579)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:280)
at
com.google.protobuf.CodedInputStream.readGroup(CodedInputStream.java:240)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:488)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:461)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:579)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:280)
at
com.google.protobuf.CodedInputStream.readGroup(CodedInputStream.java:240)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:488)
at
com.google.protobuf.GeneratedMessage.parseUnknownField(GeneratedMessage.java:193)
at
org.apache.hadoop.hdds.protocol.proto.HddsProtos$UUID.<init>(HddsProtos.java:1253)
at
org.apache.hadoop.hdds.protocol.proto.HddsProtos$UUID.<init>(HddsProtos.java:1211)
at
org.apache.hadoop.hdds.protocol.proto.HddsProtos$UUID$1.parsePartialFrom(HddsProtos.java:1299)
at
org.apache.hadoop.hdds.protocol.proto.HddsProtos$UUID$1.parsePartialFrom(HddsProtos.java:1294)
at
com.google.protobuf.CodedInputStream.readMessage(CodedInputStream.java:309)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$SnapshotInfo.<init>(OzoneManagerProtocolProtos.java)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$SnapshotInfo.<init>(OzoneManagerProtocolProtos.java)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$SnapshotInfo$1.parsePartialFrom(OzoneManagerProtocolProtos.java)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$SnapshotInfo$1.parsePartialFrom(OzoneManagerProtocolProtos.java)
at
com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200)
at com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:217)
at com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:223)
at com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:49)
at
org.apache.hadoop.hdds.utils.db.Proto2Codec.fromCodecBuffer(Proto2Codec.java:89)
at
org.apache.hadoop.hdds.utils.db.Proto2Codec.fromCodecBuffer(Proto2Codec.java:35)
at
org.apache.hadoop.hdds.utils.db.DelegatedCodec.fromCodecBuffer(DelegatedCodec.java:91)
at
org.apache.hadoop.hdds.utils.db.TypedTable$1.convert(TypedTable.java:587)
at
org.apache.hadoop.hdds.utils.db.TypedTable$RawIterator.next(TypedTable.java:668)
... 22 more
{noformat}
HDDS-7952
{noformat}
OM start failed with exception
java.lang.RuntimeException: com.fasterxml.jackson.core.JsonParseException:
Unexpected character ('-' (code 45)): Expected space separating root-level
values
at [Source: (byte[])"128966e2-ebe8-4ff1-88c2-a3b637da626c"; line: 1, column:
10]
at
org.apache.hadoop.ozone.om.snapshot.RocksDbPersistentMap$1.next(RocksDbPersistentMap.java:146)
at
org.apache.hadoop.ozone.om.snapshot.RocksDbPersistentMap$1.next(RocksDbPersistentMap.java:1)
at
org.apache.hadoop.ozone.om.snapshot.SnapshotDiffManager.loadJobsOnStartUp(SnapshotDiffManager.java:1627)
at
org.apache.hadoop.ozone.om.snapshot.SnapshotDiffManager.<init>(SnapshotDiffManager.java:281)
at
org.apache.hadoop.ozone.om.OmSnapshotManager.<init>(OmSnapshotManager.java:278)
at
org.apache.hadoop.ozone.om.OzoneManager.instantiateServices(OzoneManager.java:849)
at org.apache.hadoop.ozone.om.OzoneManager.<init>(OzoneManager.java:676)
at
org.apache.hadoop.ozone.om.OzoneManager.createOm(OzoneManager.java:761)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter$OMStarterHelper.start(OzoneManagerStarter.java:189)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter.startOm(OzoneManagerStarter.java:86)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter.call(OzoneManagerStarter.java:74)
at org.apache.hadoop.hdds.cli.GenericCli.call(GenericCli.java:38)
at picocli.CommandLine.executeUserObject(CommandLine.java:1953)
at picocli.CommandLine.access$1300(CommandLine.java:145)
at
picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2346)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2311)
at
picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179)
at picocli.CommandLine.execute(CommandLine.java:2078)
at org.apache.hadoop.hdds.cli.GenericCli.execute(GenericCli.java:100)
at org.apache.hadoop.hdds.cli.GenericCli.run(GenericCli.java:91)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter.main(OzoneManagerStarter.java:58)
Caused by: com.fasterxml.jackson.core.JsonParseException: Unexpected character
('-' (code 45)): Expected space separating root-level values
at [Source: (byte[])"128966e2-ebe8-4ff1-88c2-a3b637da626c"; line: 1, column:
10]
at
com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:2391)
at
com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:735)
at
com.fasterxml.jackson.core.base.ParserMinimalBase._reportUnexpectedChar(ParserMinimalBase.java:659)
at
com.fasterxml.jackson.core.base.ParserMinimalBase._reportMissingRootWS(ParserMinimalBase.java:707)
at
com.fasterxml.jackson.core.json.UTF8StreamJsonParser._verifyRootSpace(UTF8StreamJsonParser.java:1734)
at
com.fasterxml.jackson.core.json.UTF8StreamJsonParser._parseFloat(UTF8StreamJsonParser.java:1696)
at
com.fasterxml.jackson.core.json.UTF8StreamJsonParser._parsePosNumber(UTF8StreamJsonParser.java:1467)
at
com.fasterxml.jackson.core.json.UTF8StreamJsonParser._nextTokenNotInObject(UTF8StreamJsonParser.java:900)
at
com.fasterxml.jackson.core.json.UTF8StreamJsonParser.nextToken(UTF8StreamJsonParser.java:794)
at
com.fasterxml.jackson.databind.ObjectMapper._initForReading(ObjectMapper.java:4761)
at
com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:4667)
at
com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:3690)
at
org.apache.hadoop.ozone.om.helpers.SnapshotDiffJob$SnapshotDiffJobCodec.fromPersistedFormat(SnapshotDiffJob.java:273)
at
org.apache.hadoop.ozone.om.helpers.SnapshotDiffJob$SnapshotDiffJobCodec.fromPersistedFormat(SnapshotDiffJob.java:257)
at
org.apache.hadoop.hdds.utils.db.CodecRegistry.asObject(CodecRegistry.java:101)
at
org.apache.hadoop.ozone.om.snapshot.RocksDbPersistentMap$1.next(RocksDbPersistentMap.java:143)
... 21 more
{noformat}
Ozone snapshot was released in Apache Ozone 1.4.0 and both changes were made in
1.4.0 only. So it does not impact users on official releases. But community
members relying on Ozone master branch should watch out ( I am aware of a few
companies rebasing on master branch).
Made an offline conversion tool to repair the broken OM DB. Will polish it a
bit and post a PR. Maybe we can have a separate repo for repair tools.
was:
I noticed two incompatible changes in protobuf fields introduced by HDDS-7509
and HDDS-7952.
HDDS-7509 changed the SnapshotInfo fields snapshotID, pathPreviousSnapshotID
and globalPreviousSnapshotID from string to UUID.
HDDS-7952 overhauled the snapshot diff job db.
Sharing the error stack traces for posterity:
HDDS-7509
{noformat}
2024-05-10 21:52:48,805 ERROR
[main]-org.apache.hadoop.ozone.om.OzoneManagerStarter: OM start failed with
exception
java.lang.IllegalStateException: Failed next()
at
org.apache.hadoop.hdds.utils.db.TypedTable$RawIterator.next(TypedTable.java:670)
at
org.apache.hadoop.hdds.utils.db.TypedTable$RawIterator.next(TypedTable.java:619)
at
org.apache.hadoop.ozone.om.SnapshotChainManager.loadFromSnapshotInfoTable(SnapshotChainManager.java:295)
at
org.apache.hadoop.ozone.om.SnapshotChainManager.<init>(SnapshotChainManager.java:66)
at
org.apache.hadoop.ozone.om.OmMetadataManagerImpl.start(OmMetadataManagerImpl.java:558)
at
org.apache.hadoop.ozone.om.OmMetadataManagerImpl.<init>(OmMetadataManagerImpl.java:335)
at
org.apache.hadoop.ozone.om.OzoneManager.instantiateServices(OzoneManager.java:794)
at org.apache.hadoop.ozone.om.OzoneManager.<init>(OzoneManager.java:674)
at
org.apache.hadoop.ozone.om.OzoneManager.createOm(OzoneManager.java:759)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter$OMStarterHelper.start(OzoneManagerStarter.java:189)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter.startOm(OzoneManagerStarter.java:86)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter.call(OzoneManagerStarter.java:74)
at org.apache.hadoop.hdds.cli.GenericCli.call(GenericCli.java:38)
at picocli.CommandLine.executeUserObject(CommandLine.java:1953)
at picocli.CommandLine.access$1300(CommandLine.java:145)
at
picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2346)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2311)
at
picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179)
at picocli.CommandLine.execute(CommandLine.java:2078)
at org.apache.hadoop.hdds.cli.GenericCli.execute(GenericCli.java:100)
at org.apache.hadoop.hdds.cli.GenericCli.run(GenericCli.java:91)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter.main(OzoneManagerStarter.java:58)
Caused by: com.google.protobuf.InvalidProtocolBufferException: While parsing a
protocol message, the input ended unexpectedly in the middle of a field. This
could mean either than the input has been truncated or that an embedded message
misreported its own length.
at
com.google.protobuf.InvalidProtocolBufferException.truncatedMessage(InvalidProtocolBufferException.java:70)
at
com.google.protobuf.CodedInputStream.readRawBytes(CodedInputStream.java:789)
at
com.google.protobuf.CodedInputStream.readBytes(CodedInputStream.java:329)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:484)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:461)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:579)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:280)
at
com.google.protobuf.CodedInputStream.readGroup(CodedInputStream.java:240)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:488)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:461)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:579)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:280)
at
com.google.protobuf.CodedInputStream.readGroup(CodedInputStream.java:240)
at
com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:488)
at
com.google.protobuf.GeneratedMessage.parseUnknownField(GeneratedMessage.java:193)
at
org.apache.hadoop.hdds.protocol.proto.HddsProtos$UUID.<init>(HddsProtos.java:1253)
at
org.apache.hadoop.hdds.protocol.proto.HddsProtos$UUID.<init>(HddsProtos.java:1211)
at
org.apache.hadoop.hdds.protocol.proto.HddsProtos$UUID$1.parsePartialFrom(HddsProtos.java:1299)
at
org.apache.hadoop.hdds.protocol.proto.HddsProtos$UUID$1.parsePartialFrom(HddsProtos.java:1294)
at
com.google.protobuf.CodedInputStream.readMessage(CodedInputStream.java:309)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$SnapshotInfo.<init>(OzoneManagerProtocolProtos.java)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$SnapshotInfo.<init>(OzoneManagerProtocolProtos.java)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$SnapshotInfo$1.parsePartialFrom(OzoneManagerProtocolProtos.java)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$SnapshotInfo$1.parsePartialFrom(OzoneManagerProtocolProtos.java)
at
com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200)
at com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:217)
at com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:223)
at com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:49)
at
org.apache.hadoop.hdds.utils.db.Proto2Codec.fromCodecBuffer(Proto2Codec.java:89)
at
org.apache.hadoop.hdds.utils.db.Proto2Codec.fromCodecBuffer(Proto2Codec.java:35)
at
org.apache.hadoop.hdds.utils.db.DelegatedCodec.fromCodecBuffer(DelegatedCodec.java:91)
at
org.apache.hadoop.hdds.utils.db.TypedTable$1.convert(TypedTable.java:587)
at
org.apache.hadoop.hdds.utils.db.TypedTable$RawIterator.next(TypedTable.java:668)
... 22 more
{noformat}
HDDS-7952
{noformat}
OM start failed with exception
java.lang.RuntimeException: com.fasterxml.jackson.core.JsonParseException:
Unexpected character ('-' (code 45)): Expected space separating root-level
values
at [Source: (byte[])"128966e2-ebe8-4ff1-88c2-a3b637da626c"; line: 1, column:
10]
at
org.apache.hadoop.ozone.om.snapshot.RocksDbPersistentMap$1.next(RocksDbPersistentMap.java:146)
at
org.apache.hadoop.ozone.om.snapshot.RocksDbPersistentMap$1.next(RocksDbPersistentMap.java:1)
at
org.apache.hadoop.ozone.om.snapshot.SnapshotDiffManager.loadJobsOnStartUp(SnapshotDiffManager.java:1627)
at
org.apache.hadoop.ozone.om.snapshot.SnapshotDiffManager.<init>(SnapshotDiffManager.java:281)
at
org.apache.hadoop.ozone.om.OmSnapshotManager.<init>(OmSnapshotManager.java:278)
at
org.apache.hadoop.ozone.om.OzoneManager.instantiateServices(OzoneManager.java:849)
at org.apache.hadoop.ozone.om.OzoneManager.<init>(OzoneManager.java:676)
at
org.apache.hadoop.ozone.om.OzoneManager.createOm(OzoneManager.java:761)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter$OMStarterHelper.start(OzoneManagerStarter.java:189)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter.startOm(OzoneManagerStarter.java:86)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter.call(OzoneManagerStarter.java:74)
at org.apache.hadoop.hdds.cli.GenericCli.call(GenericCli.java:38)
at picocli.CommandLine.executeUserObject(CommandLine.java:1953)
at picocli.CommandLine.access$1300(CommandLine.java:145)
at
picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2346)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2311)
at
picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179)
at picocli.CommandLine.execute(CommandLine.java:2078)
at org.apache.hadoop.hdds.cli.GenericCli.execute(GenericCli.java:100)
at org.apache.hadoop.hdds.cli.GenericCli.run(GenericCli.java:91)
at
org.apache.hadoop.ozone.om.OzoneManagerStarter.main(OzoneManagerStarter.java:58)
Caused by: com.fasterxml.jackson.core.JsonParseException: Unexpected character
('-' (code 45)): Expected space separating root-level values
at [Source: (byte[])"128966e2-ebe8-4ff1-88c2-a3b637da626c"; line: 1, column:
10]
at
com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:2391)
at
com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:735)
at
com.fasterxml.jackson.core.base.ParserMinimalBase._reportUnexpectedChar(ParserMinimalBase.java:659)
at
com.fasterxml.jackson.core.base.ParserMinimalBase._reportMissingRootWS(ParserMinimalBase.java:707)
at
com.fasterxml.jackson.core.json.UTF8StreamJsonParser._verifyRootSpace(UTF8StreamJsonParser.java:1734)
at
com.fasterxml.jackson.core.json.UTF8StreamJsonParser._parseFloat(UTF8StreamJsonParser.java:1696)
at
com.fasterxml.jackson.core.json.UTF8StreamJsonParser._parsePosNumber(UTF8StreamJsonParser.java:1467)
at
com.fasterxml.jackson.core.json.UTF8StreamJsonParser._nextTokenNotInObject(UTF8StreamJsonParser.java:900)
at
com.fasterxml.jackson.core.json.UTF8StreamJsonParser.nextToken(UTF8StreamJsonParser.java:794)
at
com.fasterxml.jackson.databind.ObjectMapper._initForReading(ObjectMapper.java:4761)
at
com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:4667)
at
com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:3690)
at
org.apache.hadoop.ozone.om.helpers.SnapshotDiffJob$SnapshotDiffJobCodec.fromPersistedFormat(SnapshotDiffJob.java:273)
at
org.apache.hadoop.ozone.om.helpers.SnapshotDiffJob$SnapshotDiffJobCodec.fromPersistedFormat(SnapshotDiffJob.java:257)
at
org.apache.hadoop.hdds.utils.db.CodecRegistry.asObject(CodecRegistry.java:101)
at
org.apache.hadoop.ozone.om.snapshot.RocksDbPersistentMap$1.next(RocksDbPersistentMap.java:143)
... 21 more
{noformat}
Ozone snapshot was released in Apache Ozone 1.4.0 and both changes were made in
1.4.0 only. But community members relying on Ozone master branch should watch
out ( I am aware of a few companies rebasing on master branch).
Made an offline conversion tool to repair the broken OM DB. Will polish it a
bit and post a PR. Maybe we can have a separate repo for repair tools.
> Snapshot incompatible protobuf changes
> --------------------------------------
>
> Key: HDDS-10853
> URL: https://issues.apache.org/jira/browse/HDDS-10853
> Project: Apache Ozone
> Issue Type: Bug
> Affects Versions: 1.4.0
> Reporter: Wei-Chiu Chuang
> Priority: Major
>
> I noticed two incompatible changes in protobuf fields introduced by HDDS-7509
> and HDDS-7952.
>
> HDDS-7509 changed the SnapshotInfo fields snapshotID, pathPreviousSnapshotID
> and globalPreviousSnapshotID from string to UUID.
> HDDS-7952 overhauled the snapshot diff job db.
>
> Sharing the error stack traces for posterity:
> HDDS-7509
> {noformat}
> 2024-05-10 21:52:48,805 ERROR
> [main]-org.apache.hadoop.ozone.om.OzoneManagerStarter: OM start failed with
> exception
> java.lang.IllegalStateException: Failed next()
> at
> org.apache.hadoop.hdds.utils.db.TypedTable$RawIterator.next(TypedTable.java:670)
> at
> org.apache.hadoop.hdds.utils.db.TypedTable$RawIterator.next(TypedTable.java:619)
> at
> org.apache.hadoop.ozone.om.SnapshotChainManager.loadFromSnapshotInfoTable(SnapshotChainManager.java:295)
> at
> org.apache.hadoop.ozone.om.SnapshotChainManager.<init>(SnapshotChainManager.java:66)
> at
> org.apache.hadoop.ozone.om.OmMetadataManagerImpl.start(OmMetadataManagerImpl.java:558)
> at
> org.apache.hadoop.ozone.om.OmMetadataManagerImpl.<init>(OmMetadataManagerImpl.java:335)
> at
> org.apache.hadoop.ozone.om.OzoneManager.instantiateServices(OzoneManager.java:794)
> at
> org.apache.hadoop.ozone.om.OzoneManager.<init>(OzoneManager.java:674)
> at
> org.apache.hadoop.ozone.om.OzoneManager.createOm(OzoneManager.java:759)
> at
> org.apache.hadoop.ozone.om.OzoneManagerStarter$OMStarterHelper.start(OzoneManagerStarter.java:189)
> at
> org.apache.hadoop.ozone.om.OzoneManagerStarter.startOm(OzoneManagerStarter.java:86)
> at
> org.apache.hadoop.ozone.om.OzoneManagerStarter.call(OzoneManagerStarter.java:74)
> at org.apache.hadoop.hdds.cli.GenericCli.call(GenericCli.java:38)
> at picocli.CommandLine.executeUserObject(CommandLine.java:1953)
> at picocli.CommandLine.access$1300(CommandLine.java:145)
> at
> picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352)
> at picocli.CommandLine$RunLast.handle(CommandLine.java:2346)
> at picocli.CommandLine$RunLast.handle(CommandLine.java:2311)
> at
> picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179)
> at picocli.CommandLine.execute(CommandLine.java:2078)
> at org.apache.hadoop.hdds.cli.GenericCli.execute(GenericCli.java:100)
> at org.apache.hadoop.hdds.cli.GenericCli.run(GenericCli.java:91)
> at
> org.apache.hadoop.ozone.om.OzoneManagerStarter.main(OzoneManagerStarter.java:58)
> Caused by: com.google.protobuf.InvalidProtocolBufferException: While parsing
> a protocol message, the input ended unexpectedly in the middle of a field.
> This could mean either than the input has been truncated or that an embedded
> message misreported its own length.
> at
> com.google.protobuf.InvalidProtocolBufferException.truncatedMessage(InvalidProtocolBufferException.java:70)
> at
> com.google.protobuf.CodedInputStream.readRawBytes(CodedInputStream.java:789)
> at
> com.google.protobuf.CodedInputStream.readBytes(CodedInputStream.java:329)
> at
> com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:484)
> at
> com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:461)
> at
> com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:579)
> at
> com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:280)
> at
> com.google.protobuf.CodedInputStream.readGroup(CodedInputStream.java:240)
> at
> com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:488)
> at
> com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:461)
> at
> com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:579)
> at
> com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:280)
> at
> com.google.protobuf.CodedInputStream.readGroup(CodedInputStream.java:240)
> at
> com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:488)
> at
> com.google.protobuf.GeneratedMessage.parseUnknownField(GeneratedMessage.java:193)
> at
> org.apache.hadoop.hdds.protocol.proto.HddsProtos$UUID.<init>(HddsProtos.java:1253)
> at
> org.apache.hadoop.hdds.protocol.proto.HddsProtos$UUID.<init>(HddsProtos.java:1211)
> at
> org.apache.hadoop.hdds.protocol.proto.HddsProtos$UUID$1.parsePartialFrom(HddsProtos.java:1299)
> at
> org.apache.hadoop.hdds.protocol.proto.HddsProtos$UUID$1.parsePartialFrom(HddsProtos.java:1294)
> at
> com.google.protobuf.CodedInputStream.readMessage(CodedInputStream.java:309)
> at
> org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$SnapshotInfo.<init>(OzoneManagerProtocolProtos.java)
> at
> org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$SnapshotInfo.<init>(OzoneManagerProtocolProtos.java)
> at
> org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$SnapshotInfo$1.parsePartialFrom(OzoneManagerProtocolProtos.java)
> at
> org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$SnapshotInfo$1.parsePartialFrom(OzoneManagerProtocolProtos.java)
> at
> com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200)
> at
> com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:217)
> at
> com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:223)
> at
> com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:49)
> at
> org.apache.hadoop.hdds.utils.db.Proto2Codec.fromCodecBuffer(Proto2Codec.java:89)
> at
> org.apache.hadoop.hdds.utils.db.Proto2Codec.fromCodecBuffer(Proto2Codec.java:35)
> at
> org.apache.hadoop.hdds.utils.db.DelegatedCodec.fromCodecBuffer(DelegatedCodec.java:91)
> at
> org.apache.hadoop.hdds.utils.db.TypedTable$1.convert(TypedTable.java:587)
> at
> org.apache.hadoop.hdds.utils.db.TypedTable$RawIterator.next(TypedTable.java:668)
> ... 22 more
> {noformat}
>
> HDDS-7952
> {noformat}
> OM start failed with exception
> java.lang.RuntimeException: com.fasterxml.jackson.core.JsonParseException:
> Unexpected character ('-' (code 45)): Expected space separating root-level
> values
> at [Source: (byte[])"128966e2-ebe8-4ff1-88c2-a3b637da626c"; line: 1, column:
> 10]
> at
> org.apache.hadoop.ozone.om.snapshot.RocksDbPersistentMap$1.next(RocksDbPersistentMap.java:146)
> at
> org.apache.hadoop.ozone.om.snapshot.RocksDbPersistentMap$1.next(RocksDbPersistentMap.java:1)
> at
> org.apache.hadoop.ozone.om.snapshot.SnapshotDiffManager.loadJobsOnStartUp(SnapshotDiffManager.java:1627)
> at
> org.apache.hadoop.ozone.om.snapshot.SnapshotDiffManager.<init>(SnapshotDiffManager.java:281)
> at
> org.apache.hadoop.ozone.om.OmSnapshotManager.<init>(OmSnapshotManager.java:278)
> at
> org.apache.hadoop.ozone.om.OzoneManager.instantiateServices(OzoneManager.java:849)
> at org.apache.hadoop.ozone.om.OzoneManager.<init>(OzoneManager.java:676)
> at
> org.apache.hadoop.ozone.om.OzoneManager.createOm(OzoneManager.java:761)
> at
> org.apache.hadoop.ozone.om.OzoneManagerStarter$OMStarterHelper.start(OzoneManagerStarter.java:189)
> at
> org.apache.hadoop.ozone.om.OzoneManagerStarter.startOm(OzoneManagerStarter.java:86)
> at
> org.apache.hadoop.ozone.om.OzoneManagerStarter.call(OzoneManagerStarter.java:74)
> at org.apache.hadoop.hdds.cli.GenericCli.call(GenericCli.java:38)
> at picocli.CommandLine.executeUserObject(CommandLine.java:1953)
> at picocli.CommandLine.access$1300(CommandLine.java:145)
> at
> picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352)
> at picocli.CommandLine$RunLast.handle(CommandLine.java:2346)
> at picocli.CommandLine$RunLast.handle(CommandLine.java:2311)
> at
> picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179)
> at picocli.CommandLine.execute(CommandLine.java:2078)
> at org.apache.hadoop.hdds.cli.GenericCli.execute(GenericCli.java:100)
> at org.apache.hadoop.hdds.cli.GenericCli.run(GenericCli.java:91)
> at
> org.apache.hadoop.ozone.om.OzoneManagerStarter.main(OzoneManagerStarter.java:58)
> Caused by: com.fasterxml.jackson.core.JsonParseException: Unexpected
> character ('-' (code 45)): Expected space separating root-level values
> at [Source: (byte[])"128966e2-ebe8-4ff1-88c2-a3b637da626c"; line: 1, column:
> 10]
> at
> com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:2391)
> at
> com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:735)
> at
> com.fasterxml.jackson.core.base.ParserMinimalBase._reportUnexpectedChar(ParserMinimalBase.java:659)
> at
> com.fasterxml.jackson.core.base.ParserMinimalBase._reportMissingRootWS(ParserMinimalBase.java:707)
> at
> com.fasterxml.jackson.core.json.UTF8StreamJsonParser._verifyRootSpace(UTF8StreamJsonParser.java:1734)
> at
> com.fasterxml.jackson.core.json.UTF8StreamJsonParser._parseFloat(UTF8StreamJsonParser.java:1696)
> at
> com.fasterxml.jackson.core.json.UTF8StreamJsonParser._parsePosNumber(UTF8StreamJsonParser.java:1467)
> at
> com.fasterxml.jackson.core.json.UTF8StreamJsonParser._nextTokenNotInObject(UTF8StreamJsonParser.java:900)
> at
> com.fasterxml.jackson.core.json.UTF8StreamJsonParser.nextToken(UTF8StreamJsonParser.java:794)
> at
> com.fasterxml.jackson.databind.ObjectMapper._initForReading(ObjectMapper.java:4761)
> at
> com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:4667)
> at
> com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:3690)
> at
> org.apache.hadoop.ozone.om.helpers.SnapshotDiffJob$SnapshotDiffJobCodec.fromPersistedFormat(SnapshotDiffJob.java:273)
> at
> org.apache.hadoop.ozone.om.helpers.SnapshotDiffJob$SnapshotDiffJobCodec.fromPersistedFormat(SnapshotDiffJob.java:257)
> at
> org.apache.hadoop.hdds.utils.db.CodecRegistry.asObject(CodecRegistry.java:101)
> at
> org.apache.hadoop.ozone.om.snapshot.RocksDbPersistentMap$1.next(RocksDbPersistentMap.java:143)
> ... 21 more
> {noformat}
> Ozone snapshot was released in Apache Ozone 1.4.0 and both changes were made
> in 1.4.0 only. So it does not impact users on official releases. But
> community members relying on Ozone master branch should watch out ( I am
> aware of a few companies rebasing on master branch).
> Made an offline conversion tool to repair the broken OM DB. Will polish it a
> bit and post a PR. Maybe we can have a separate repo for repair tools.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]