siddhantsangwan opened a new pull request, #9790: URL: https://github.com/apache/ozone/pull/9790
## What changes were proposed in this pull request? In the Datanode for Schema V3, we have one RocksDB per volume. If the disk this DB is using gets full, RocksDB operations will fail with an exception and Ozone handles this by closing the DB. This means not only writes, but reads will also fail in this situation. For example, I simulated a disk full in ozone and captured the db behaviour (grep for `IOError(NoSpace)` and `No space left on device`): ``` datanode4-1 | 2026-02-02 13:33:49,623 [1003db69-5a14-4a16-b901-5dffd2d17036-ContainerOp-d770c349-2c11-42b6-8c02-ba482d730739-4] WARN keyvalue.KeyValueHandler: Operation: PutBlock , Trace ID: , Message: Put Key failed , Result: IO_EXCEPTION , StorageContainerException Occurred. datanode4-1 | org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException: Put Key failed datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:689) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.dispatchRequest(KeyValueHandler.java:340) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handle(KeyValueHandler.java:303) datanode4-1 | at org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(HddsDispatcher.java:370) datanode4-1 | at org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.lambda$dispatch$1(HddsDispatcher.java:195) datanode4-1 | at org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:89) datanode4-1 | at org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatch(HddsDispatcher.java:194) datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567) datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061) datanode4-1 | at org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121) datanode4-1 | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38) datanode4-1 | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78) datanode4-1 | at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572) datanode4-1 | at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) datanode4-1 | at java.base/java.lang.Thread.run(Thread.java:1583) datanode4-1 | Caused by: org.apache.hadoop.hdds.utils.db.RocksDatabaseException: IOError(NoSpace): RocksDatabase[/data/metadata/db/CID-5544c724-d702-4c2b-b8c5-948d6c36bde0/DS-58c1b346-6d46-4b1f-a641-2a149f3a6292/container.db]: Failed to batchWrite datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.toRocksDatabaseException(RocksDatabase.java:112) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:794) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:799) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBBatchOperation.commit(RDBBatchOperation.java:417) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBStore.commitBatchOperation(RDBStore.java:283) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.persistPutBlock(BlockManagerImpl.java:259) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.putBlock(BlockManagerImpl.java:97) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:680) datanode4-1 | ... 16 more datanode4-1 | Caused by: org.rocksdb.RocksDBException: While appending to file: /data/metadata/db/CID-5544c724-d702-4c2b-b8c5-948d6c36bde0/DS-58c1b346-6d46-4b1f-a641-2a149f3a6292/container.db/000004.log: No space left on device datanode4-1 | at org.rocksdb.RocksDB.write0(Native Method) datanode4-1 | at org.rocksdb.RocksDB.write(RocksDB.java:1708) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:791) datanode4-1 | ... 22 more datanode4-1 | 2026-02-02 13:33:49,630 [1003db69-5a14-4a16-b901-5dffd2d17036-ContainerOp-d770c349-2c11-42b6-8c02-ba482d730739-4] WARN keyvalue.KeyValueContainer: Marked container UNHEALTHY from OPEN: KeyValueContainerData #1 (UNHEALTHY, non-empty, ri=0, origin=[dn_1003db69-5a14-4a16-b901-5dffd2d17036, pipeline_d770c349-2c11-42b6-8c02-ba482d730739]) datanode4-1 | 2026-02-02 13:33:49,632 [1003db69-5a14-4a16-b901-5dffd2d17036-ContainerOp-d770c349-2c11-42b6-8c02-ba482d730739-4] ERROR keyvalue.KeyValueHandler: Cannot create container checksum for container 1 , Exception: datanode4-1 | org.apache.hadoop.hdds.utils.db.RocksDatabaseException: Rocks Database is closed datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.acquire(RocksDatabase.java:432) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.newIterator(RocksDatabase.java:780) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBTable.iterator(RDBTable.java:223) datanode4-1 | at org.apache.hadoop.hdds.utils.db.TypedTable.newCodecBufferTableIterator(TypedTable.java:508) datanode4-1 | at org.apache.hadoop.hdds.utils.db.TypedTable.iterator(TypedTable.java:404) datanode4-1 | at org.apache.hadoop.hdds.utils.db.Table.iterator(Table.java:152) datanode4-1 | at org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaThreeImpl.getBlockIterator(DatanodeStoreSchemaThreeImpl.java:91) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.updateAndGetContainerChecksumFromMetadata(KeyValueHandler.java:1456) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.updateContainerChecksumFromMetadataIfNeeded(KeyValueHandler.java:1438) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.markContainerUnhealthy(KeyValueHandler.java:1538) datanode4-1 | at org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(HddsDispatcher.java:413) datanode4-1 | at org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.lambda$dispatch$1(HddsDispatcher.java:195) datanode4-1 | at org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:89) datanode4-1 | at org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatch(HddsDispatcher.java:194) datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567) datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061) datanode4-1 | at org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121) datanode4-1 | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38) datanode4-1 | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78) datanode4-1 | at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572) datanode4-1 | at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) datanode4-1 | at java.base/java.lang.Thread.run(Thread.java:1583) datanode4-1 | 2026-02-02 13:33:49,635 [1003db69-5a14-4a16-b901-5dffd2d17036-ContainerOp-d770c349-2c11-42b6-8c02-ba482d730739-4] INFO impl.HddsDispatcher: Marked Container UNHEALTHY, ContainerID: 1 datanode4-1 | 2026-02-02 13:33:49,644 [1003db69-5a14-4a16-b901-5dffd2d17036-ContainerOp-d770c349-2c11-42b6-8c02-ba482d730739-4] ERROR ratis.ContainerStateMachine: gid group-BA482D730739 : ApplyTransaction failed. cmd PutBlock logIndex 5 msg : Put Key failed Container Result: IO_EXCEPTION ``` With our recent improvements in this area, it's hard for the Datanode data disk (`hdds.datanode.dir`, where the block files are) to get full - it should only happen if some other process takes up too much space or the cluster is misconfigured. By default RocksDB is on the same disk. 1. If RocksDB is on the same disk as data (which it is by default), disk full should only happen because of a non-ozone process writing too much. 2. RocksDB can be configured to be on a separate disk. In that case it's possible to fully use the disk. ### Handling We have a class `DatanodeStoreCache` that has a map for db path -> db handle. This serves as a cache and most operations (read/write etc.) go through this cache for getting the db handle. In this class, before returning the db handle, we can check if the db is closed. If closed, try to first open it in read-write mode. If that fails with a no space exception, try opening in read-only mode. Nice AI generated flowchart: <img width="595" height="820" alt="image" src="https://github.com/user-attachments/assets/17bd708a-3b0a-40a3-ac96-8ad9bbce8802" /> ### Other points 1. In read-only mode, reads work, writes fail. Write failure will not cause DB to close. Tested in `testNoSpaceOpenFallsBackToReadOnly`. 2. Some parts of the code may not access the DB through the cache... still looking into this. 3. If a write fails because of exceptions, generally the container is marked `UNHEALTHY` and the pipeline is closed (there's a case by case check). ### Next pull requests 1. Add metrics/observability around db closing and opening. Logs are already added to this PR. 2. Handling for moving back from read-only to read-write if possible. ## What is the link to the Apache JIRA https://issues.apache.org/jira/browse/HDDS-13994 ## How was this patch tested? Added unit tests. Also did manual testing using a docker compose cluster and simulating a full volume. Some grepped logs: ``` $ docker compose logs -f datanode4 | grep -C 10 -Ei 'No space left on device|IOError|Rocks Database is closed|Retrying read-only|read-only mode|due to RocksDB error status' datanode4-1 | 2026-02-18 11:49:28,814 [DiskUsage-/data/metadata- datanode4-1 | ] INFO fs.DUOptimized: Disk metaPath du usages 1903083520, container data usages 0 datanode4-1 | 2026-02-18 11:49:28,815 [DiskUsage-/data/hdds- datanode4-1 | ] INFO fs.DUOptimized: Disk metaPath du usages 4096, container data usages 0 datanode4-1 | 2026-02-18 11:49:48,824 [DiskUsage-/data/ratis/dn.ratis- datanode4-1 | ] INFO fs.DUOptimized: Disk metaPath du usages 16826368, container data usages 0 datanode4-1 | 2026-02-18 11:49:48,824 [DiskUsage-/data/metadata- datanode4-1 | ] INFO fs.DUOptimized: Disk metaPath du usages 1903083520, container data usages 0 datanode4-1 | 2026-02-18 11:49:48,824 [DiskUsage-/data/hdds- datanode4-1 | ] INFO fs.DUOptimized: Disk metaPath du usages 4096, container data usages 0 datanode4-1 | 2026-02-18 11:49:50,838 [a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-4554bed7-e070-4e1c-b806-42b3a389e7f9-4] WARN db.RocksDatabase: Closing RocksDatabase[/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db] **due to RocksDB error status**=**IOError**(NoSpace) state=While appending to file: /data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db/000004.log: **No space left on device** datanode4-1 | org.rocksdb.RocksDBException: While appending to file: /data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db/000004.log: **No space left on device** datanode4-1 | at org.rocksdb.RocksDB.write0(Native Method) datanode4-1 | at org.rocksdb.RocksDB.write(RocksDB.java:1708) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:806) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:814) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBBatchOperation.commit(RDBBatchOperation.java:417) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBStore.commitBatchOperation(RDBStore.java:283) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.persistPutBlock(BlockManagerImpl.java:259) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.putBlock(BlockManagerImpl.java:97) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:680) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.dispatchRequest(KeyValueHandler.java:340) -- datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061) datanode4-1 | at org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121) datanode4-1 | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38) datanode4-1 | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78) datanode4-1 | at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572) datanode4-1 | at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) datanode4-1 | at java.base/java.lang.Thread.run(Thread.java:1583) datanode4-1 | 2026-02-18 11:49:50,899 [a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-e950ca02-71a4-4a0a-9f6d-f017676f7c32-1] WARN db.RDBStore: Failed to flush DB before close datanode4-1 | org.apache.hadoop.hdds.utils.db.RocksDatabaseException: **Rocks Database is closed** datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.acquire(RocksDatabase.java:447) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.flush(RocksDatabase.java:490) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBStore.flushDB(RDBStore.java:313) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBStore.close(RDBStore.java:260) datanode4-1 | at org.apache.hadoop.ozone.container.metadata.AbstractRDBStore.stop(AbstractRDBStore.java:122) datanode4-1 | at org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache.getDB(DatanodeStoreCache.java:137) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils.getDB(BlockUtils.java:130) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.persistPutBlock(BlockManagerImpl.java:185) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.putBlock(BlockManagerImpl.java:97) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:680) -- datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567) datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061) datanode4-1 | at org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121) datanode4-1 | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38) datanode4-1 | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78) datanode4-1 | at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572) datanode4-1 | at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) datanode4-1 | at java.base/java.lang.Thread.run(Thread.java:1583) datanode4-1 | 2026-02-18 11:49:50,951 [a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-e950ca02-71a4-4a0a-9f6d-f017676f7c32-1] WARN utils.DatanodeStoreCache: Failed to open db /data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db in read-write mode due to no space. **Retrying read-only**. datanode4-1 | org.apache.hadoop.hdds.utils.db.RocksDatabaseException: Failed to create RDBStore from /data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBStore.<init>(RDBStore.java:185) datanode4-1 | at org.apache.hadoop.hdds.utils.db.DBStoreBuilder.build(DBStoreBuilder.java:230) datanode4-1 | at org.apache.hadoop.ozone.container.metadata.AbstractDatanodeStore.initDBStore(AbstractDatanodeStore.java:96) datanode4-1 | at org.apache.hadoop.ozone.container.metadata.AbstractRDBStore.<init>(AbstractRDBStore.java:79) datanode4-1 | at org.apache.hadoop.ozone.container.metadata.AbstractDatanodeStore.<init>(AbstractDatanodeStore.java:72) datanode4-1 | at org.apache.hadoop.ozone.container.metadata.DatanodeStoreWithIncrementalChunkList.<init>(DatanodeStoreWithIncrementalChunkList.java:53) datanode4-1 | at org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaThreeImpl.<init>(DatanodeStoreSchemaThreeImpl.java:73) datanode4-1 | at org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache.lambda$static$0(DatanodeStoreCache.java:46) datanode4-1 | at org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache.createRawDB(DatanodeStoreCache.java:98) -- datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567) datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061) datanode4-1 | at org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121) datanode4-1 | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38) datanode4-1 | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78) datanode4-1 | at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572) datanode4-1 | at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) datanode4-1 | at java.base/java.lang.Thread.run(Thread.java:1583) datanode4-1 | Caused by: org.apache.hadoop.hdds.utils.db.RocksDatabaseException: **IOError**(NoSpace): class org.apache.hadoop.hdds.utils.db.RocksDatabase: Failed to open /data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.toRocksDatabaseException(RocksDatabase.java:112) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.open(RocksDatabase.java:182) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBStore.<init>(RDBStore.java:116) datanode4-1 | ... 30 more datanode4-1 | Caused by: org.rocksdb.RocksDBException: While appending to file: /data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db/000019.dbtmp: **No space left on device** datanode4-1 | at org.rocksdb.RocksDB.open(Native Method) datanode4-1 | at org.rocksdb.RocksDB.open(RocksDB.java:307) datanode4-1 | at org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB.open(ManagedRocksDB.java:84) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.open(RocksDatabase.java:176) datanode4-1 | ... 31 more datanode4-1 | 2026-02-18 11:49:50,965 [a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-d9b3456d-ea5c-4bc3-bd12-bd4c7941e894-3] WARN keyvalue.KeyValueHandler: Operation: PutBlock , Trace ID: , Message: Put Key failed , Result: IO_EXCEPTION , StorageContainerException Occurred. datanode4-1 | org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException: Put Key failed datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:689) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.dispatchRequest(KeyValueHandler.java:340) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handle(KeyValueHandler.java:303) -- datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567) datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061) datanode4-1 | at org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121) datanode4-1 | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38) datanode4-1 | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78) datanode4-1 | at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572) datanode4-1 | at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) datanode4-1 | at java.base/java.lang.Thread.run(Thread.java:1583) datanode4-1 | Caused by: org.apache.hadoop.hdds.utils.db.RocksDatabaseException: **IOError**(NoSpace): RocksDatabase[/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db]: Failed to batchWrite datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.toRocksDatabaseException(RocksDatabase.java:112) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:809) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:814) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBBatchOperation.commit(RDBBatchOperation.java:417) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBStore.commitBatchOperation(RDBStore.java:283) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.persistPutBlock(BlockManagerImpl.java:259) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.putBlock(BlockManagerImpl.java:97) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:680) datanode4-1 | ... 16 more datanode4-1 | Caused by: org.rocksdb.RocksDBException: While appending to file: /data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db/000004.log: **No space left on device** datanode4-1 | at org.rocksdb.RocksDB.write0(Native Method) datanode4-1 | at org.rocksdb.RocksDB.write(RocksDB.java:1708) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:806) datanode4-1 | ... 22 more datanode4-1 | 2026-02-18 11:49:50,965 [a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-4554bed7-e070-4e1c-b806-42b3a389e7f9-4] WARN keyvalue.KeyValueHandler: Operation: PutBlock , Trace ID: , Message: Put Key failed , Result: IO_EXCEPTION , StorageContainerException Occurred. datanode4-1 | org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException: Put Key failed datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:689) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.dispatchRequest(KeyValueHandler.java:340) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handle(KeyValueHandler.java:303) datanode4-1 | at org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(HddsDispatcher.java:370) -- datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567) datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061) datanode4-1 | at org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121) datanode4-1 | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38) datanode4-1 | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78) datanode4-1 | at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572) datanode4-1 | at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) datanode4-1 | at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) datanode4-1 | at java.base/java.lang.Thread.run(Thread.java:1583) datanode4-1 | Caused by: org.apache.hadoop.hdds.utils.db.RocksDatabaseException: **IOError**(NoSpace): RocksDatabase[/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db]: Failed to batchWrite datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.toRocksDatabaseException(RocksDatabase.java:112) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:809) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:814) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBBatchOperation.commit(RDBBatchOperation.java:417) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RDBStore.commitBatchOperation(RDBStore.java:283) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.persistPutBlock(BlockManagerImpl.java:259) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.putBlock(BlockManagerImpl.java:97) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:680) datanode4-1 | ... 16 more datanode4-1 | Caused by: org.rocksdb.RocksDBException: While appending to file: /data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db/000004.log: **No space left on device** datanode4-1 | at org.rocksdb.RocksDB.write0(Native Method) datanode4-1 | at org.rocksdb.RocksDB.write(RocksDB.java:1708) datanode4-1 | at org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:806) datanode4-1 | ... 22 more datanode4-1 | 2026-02-18 11:49:50,969 [a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-e950ca02-71a4-4a0a-9f6d-f017676f7c32-1] WARN utils.DatanodeStoreCache: Opened db /data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db in **read-only mode** after read-write open failed due to no space datanode4-1 | 2026-02-18 11:49:50,972 [a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-cb87e2ea-f964-4c8b-8f25-79b6a5242fb2-2] WARN keyvalue.KeyValueHandler: Operation: PutBlock , Trace ID: , Message: Put Key failed , Result: IO_EXCEPTION , StorageContainerException Occurred. datanode4-1 | org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException: Put Key failed datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:689) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.dispatchRequest(KeyValueHandler.java:340) datanode4-1 | at org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handle(KeyValueHandler.java:303) datanode4-1 | at org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(HddsDispatcher.java:370) datanode4-1 | at org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.lambda$dispatch$1(HddsDispatcher.java:195) datanode4-1 | at org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:89) datanode4-1 | at org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatch(HddsDispatcher.java:194) datanode4-1 | at org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
