siddhantsangwan opened a new pull request, #9790:
URL: https://github.com/apache/ozone/pull/9790

   ## What changes were proposed in this pull request?
   In the Datanode for Schema V3, we have one RocksDB per volume. If the disk 
this DB is using gets full, RocksDB operations will fail with an exception and 
Ozone handles this by closing the DB. This means not only writes, but reads 
will also fail in this situation. 
   
   For example, I simulated a disk full in ozone and captured the db behaviour 
(grep for `IOError(NoSpace)` and `No space left on device`):
   
   ```
   datanode4-1  | 2026-02-02 13:33:49,623 
[1003db69-5a14-4a16-b901-5dffd2d17036-ContainerOp-d770c349-2c11-42b6-8c02-ba482d730739-4]
 WARN keyvalue.KeyValueHandler: Operation: PutBlock , Trace ID:  , Message: Put 
Key failed , Result: IO_EXCEPTION , StorageContainerException Occurred.
   
   datanode4-1  | 
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException: 
Put Key failed
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:689)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.dispatchRequest(KeyValueHandler.java:340)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handle(KeyValueHandler.java:303)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(HddsDispatcher.java:370)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.lambda$dispatch$1(HddsDispatcher.java:195)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:89)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatch(HddsDispatcher.java:194)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061)
   
   datanode4-1  | at 
org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78)
   
   datanode4-1  | at 
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
   
   datanode4-1  | at 
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
   
   datanode4-1  | at java.base/java.lang.Thread.run(Thread.java:1583)
   
   datanode4-1  | Caused by: 
org.apache.hadoop.hdds.utils.db.RocksDatabaseException: IOError(NoSpace): 
RocksDatabase[/data/metadata/db/CID-5544c724-d702-4c2b-b8c5-948d6c36bde0/DS-58c1b346-6d46-4b1f-a641-2a149f3a6292/container.db]:
 Failed to batchWrite
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.toRocksDatabaseException(RocksDatabase.java:112)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:794)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:799)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBBatchOperation.commit(RDBBatchOperation.java:417)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBStore.commitBatchOperation(RDBStore.java:283)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.persistPutBlock(BlockManagerImpl.java:259)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.putBlock(BlockManagerImpl.java:97)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:680)
   
   datanode4-1  | ... 16 more
   
   datanode4-1  | Caused by: org.rocksdb.RocksDBException: While appending to 
file: 
/data/metadata/db/CID-5544c724-d702-4c2b-b8c5-948d6c36bde0/DS-58c1b346-6d46-4b1f-a641-2a149f3a6292/container.db/000004.log:
 No space left on device
   
   datanode4-1  | at org.rocksdb.RocksDB.write0(Native Method)
   
   datanode4-1  | at org.rocksdb.RocksDB.write(RocksDB.java:1708)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:791)
   
   datanode4-1  | ... 22 more
   
   datanode4-1  | 2026-02-02 13:33:49,630 
[1003db69-5a14-4a16-b901-5dffd2d17036-ContainerOp-d770c349-2c11-42b6-8c02-ba482d730739-4]
 WARN keyvalue.KeyValueContainer: Marked container UNHEALTHY from OPEN: 
KeyValueContainerData #1 (UNHEALTHY, non-empty, ri=0, 
origin=[dn_1003db69-5a14-4a16-b901-5dffd2d17036, 
pipeline_d770c349-2c11-42b6-8c02-ba482d730739])
   
   datanode4-1  | 2026-02-02 13:33:49,632 
[1003db69-5a14-4a16-b901-5dffd2d17036-ContainerOp-d770c349-2c11-42b6-8c02-ba482d730739-4]
 ERROR keyvalue.KeyValueHandler: Cannot create container checksum for container 
1 , Exception: 
   
   datanode4-1  | org.apache.hadoop.hdds.utils.db.RocksDatabaseException: Rocks 
Database is closed
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.acquire(RocksDatabase.java:432)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.newIterator(RocksDatabase.java:780)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBTable.iterator(RDBTable.java:223)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.TypedTable.newCodecBufferTableIterator(TypedTable.java:508)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.TypedTable.iterator(TypedTable.java:404)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.Table.iterator(Table.java:152)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaThreeImpl.getBlockIterator(DatanodeStoreSchemaThreeImpl.java:91)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.updateAndGetContainerChecksumFromMetadata(KeyValueHandler.java:1456)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.updateContainerChecksumFromMetadataIfNeeded(KeyValueHandler.java:1438)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.markContainerUnhealthy(KeyValueHandler.java:1538)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(HddsDispatcher.java:413)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.lambda$dispatch$1(HddsDispatcher.java:195)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:89)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatch(HddsDispatcher.java:194)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061)
   
   datanode4-1  | at 
org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78)
   
   datanode4-1  | at 
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
   
   datanode4-1  | at 
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
   
   datanode4-1  | at java.base/java.lang.Thread.run(Thread.java:1583)
   
   datanode4-1  | 2026-02-02 13:33:49,635 
[1003db69-5a14-4a16-b901-5dffd2d17036-ContainerOp-d770c349-2c11-42b6-8c02-ba482d730739-4]
 INFO impl.HddsDispatcher: Marked Container UNHEALTHY, ContainerID: 1
   
   datanode4-1  | 2026-02-02 13:33:49,644 
[1003db69-5a14-4a16-b901-5dffd2d17036-ContainerOp-d770c349-2c11-42b6-8c02-ba482d730739-4]
 ERROR ratis.ContainerStateMachine: gid group-BA482D730739 : ApplyTransaction 
failed. cmd PutBlock logIndex 5 msg : Put Key failed Container Result: 
IO_EXCEPTION
   ```
   
   With our recent improvements in this area, it's hard for the Datanode data 
disk (`hdds.datanode.dir`, where the block files are) to get full - it should 
only happen if some other process takes up too much space or the cluster is 
misconfigured. By default RocksDB is on the same disk.
   
   1. If RocksDB is on the same disk as data (which it is by default), disk 
full should only happen because of a non-ozone process writing too much.
   2. RocksDB can be configured to be on a separate disk. In that case it's 
possible to fully use the disk.
   
   ### Handling
   We have a class `DatanodeStoreCache` that has a map for db path -> db 
handle. This serves as a cache and most operations (read/write etc.) go through 
this cache for getting the db handle. In this class, before returning the db 
handle, we can check if the db is closed. If closed, try to first open it in 
read-write mode. If that fails with a no space exception, try opening in 
read-only mode.
   
   Nice AI generated flowchart:
   <img width="595" height="820" alt="image" 
src="https://github.com/user-attachments/assets/17bd708a-3b0a-40a3-ac96-8ad9bbce8802";
 />
   
   ### Other points
   1. In read-only mode, reads work, writes fail. Write failure will not cause 
DB to close. Tested in `testNoSpaceOpenFallsBackToReadOnly`.
   2. Some parts of the code may not access the DB through the cache... still 
looking into this. 
   3. If a write fails because of exceptions, generally the container is marked 
`UNHEALTHY` and the pipeline is closed (there's a case by case check). 
   
   ### Next pull requests
   1. Add metrics/observability around db closing and opening. Logs are already 
added to this PR.
   2. Handling for moving back from read-only to read-write if possible.
   
   ## What is the link to the Apache JIRA
   
   https://issues.apache.org/jira/browse/HDDS-13994
   
   ## How was this patch tested?
   
   Added unit tests.
   
   Also did manual testing using a docker compose cluster and simulating a full 
volume. Some grepped logs:
   
   ```
   $ docker compose logs -f datanode4 | grep -C 10 -Ei 'No space left on 
device|IOError|Rocks Database is closed|Retrying read-only|read-only mode|due 
to RocksDB error status'
   
   datanode4-1  | 2026-02-18 11:49:28,814 [DiskUsage-/data/metadata-
   
   datanode4-1  | ] INFO fs.DUOptimized: Disk metaPath du usages 1903083520, 
container data usages 0
   
   datanode4-1  | 2026-02-18 11:49:28,815 [DiskUsage-/data/hdds-
   
   datanode4-1  | ] INFO fs.DUOptimized: Disk metaPath du usages 4096, 
container data usages 0
   
   datanode4-1  | 2026-02-18 11:49:48,824 [DiskUsage-/data/ratis/dn.ratis-
   
   datanode4-1  | ] INFO fs.DUOptimized: Disk metaPath du usages 16826368, 
container data usages 0
   
   datanode4-1  | 2026-02-18 11:49:48,824 [DiskUsage-/data/metadata-
   
   datanode4-1  | ] INFO fs.DUOptimized: Disk metaPath du usages 1903083520, 
container data usages 0
   
   datanode4-1  | 2026-02-18 11:49:48,824 [DiskUsage-/data/hdds-
   
   datanode4-1  | ] INFO fs.DUOptimized: Disk metaPath du usages 4096, 
container data usages 0
   
   datanode4-1  | 2026-02-18 11:49:50,838 
[a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-4554bed7-e070-4e1c-b806-42b3a389e7f9-4]
 WARN db.RocksDatabase: Closing 
RocksDatabase[/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db]
 **due to RocksDB error status**=**IOError**(NoSpace) state=While appending to 
file: 
/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db/000004.log:
 **No space left on device**
   
   datanode4-1  | org.rocksdb.RocksDBException: While appending to file: 
/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db/000004.log:
 **No space left on device**
   
   datanode4-1  | at org.rocksdb.RocksDB.write0(Native Method)
   
   datanode4-1  | at org.rocksdb.RocksDB.write(RocksDB.java:1708)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:806)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:814)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBBatchOperation.commit(RDBBatchOperation.java:417)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBStore.commitBatchOperation(RDBStore.java:283)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.persistPutBlock(BlockManagerImpl.java:259)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.putBlock(BlockManagerImpl.java:97)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:680)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.dispatchRequest(KeyValueHandler.java:340)
   
   --
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061)
   
   datanode4-1  | at 
org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78)
   
   datanode4-1  | at 
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
   
   datanode4-1  | at 
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
   
   datanode4-1  | at java.base/java.lang.Thread.run(Thread.java:1583)
   
   datanode4-1  | 2026-02-18 11:49:50,899 
[a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-e950ca02-71a4-4a0a-9f6d-f017676f7c32-1]
 WARN db.RDBStore: Failed to flush DB before close
   
   datanode4-1  | org.apache.hadoop.hdds.utils.db.RocksDatabaseException: 
**Rocks Database is closed**
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.acquire(RocksDatabase.java:447)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.flush(RocksDatabase.java:490)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBStore.flushDB(RDBStore.java:313)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBStore.close(RDBStore.java:260)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.metadata.AbstractRDBStore.stop(AbstractRDBStore.java:122)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache.getDB(DatanodeStoreCache.java:137)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils.getDB(BlockUtils.java:130)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.persistPutBlock(BlockManagerImpl.java:185)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.putBlock(BlockManagerImpl.java:97)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:680)
   
   --
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061)
   
   datanode4-1  | at 
org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78)
   
   datanode4-1  | at 
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
   
   datanode4-1  | at 
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
   
   datanode4-1  | at java.base/java.lang.Thread.run(Thread.java:1583)
   
   datanode4-1  | 2026-02-18 11:49:50,951 
[a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-e950ca02-71a4-4a0a-9f6d-f017676f7c32-1]
 WARN utils.DatanodeStoreCache: Failed to open db 
/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db
 in read-write mode due to no space. **Retrying read-only**.
   
   datanode4-1  | org.apache.hadoop.hdds.utils.db.RocksDatabaseException: 
Failed to create RDBStore from 
/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBStore.<init>(RDBStore.java:185)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.DBStoreBuilder.build(DBStoreBuilder.java:230)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.metadata.AbstractDatanodeStore.initDBStore(AbstractDatanodeStore.java:96)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.metadata.AbstractRDBStore.<init>(AbstractRDBStore.java:79)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.metadata.AbstractDatanodeStore.<init>(AbstractDatanodeStore.java:72)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.metadata.DatanodeStoreWithIncrementalChunkList.<init>(DatanodeStoreWithIncrementalChunkList.java:53)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaThreeImpl.<init>(DatanodeStoreSchemaThreeImpl.java:73)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache.lambda$static$0(DatanodeStoreCache.java:46)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache.createRawDB(DatanodeStoreCache.java:98)
   
   --
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061)
   
   datanode4-1  | at 
org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78)
   
   datanode4-1  | at 
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
   
   datanode4-1  | at 
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
   
   datanode4-1  | at java.base/java.lang.Thread.run(Thread.java:1583)
   
   datanode4-1  | Caused by: 
org.apache.hadoop.hdds.utils.db.RocksDatabaseException: **IOError**(NoSpace): 
class org.apache.hadoop.hdds.utils.db.RocksDatabase: Failed to open 
/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.toRocksDatabaseException(RocksDatabase.java:112)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.open(RocksDatabase.java:182)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBStore.<init>(RDBStore.java:116)
   
   datanode4-1  | ... 30 more
   
   datanode4-1  | Caused by: org.rocksdb.RocksDBException: While appending to 
file: 
/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db/000019.dbtmp:
 **No space left on device**
   
   datanode4-1  | at org.rocksdb.RocksDB.open(Native Method)
   
   datanode4-1  | at org.rocksdb.RocksDB.open(RocksDB.java:307)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB.open(ManagedRocksDB.java:84)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.open(RocksDatabase.java:176)
   
   datanode4-1  | ... 31 more
   
   datanode4-1  | 2026-02-18 11:49:50,965 
[a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-d9b3456d-ea5c-4bc3-bd12-bd4c7941e894-3]
 WARN keyvalue.KeyValueHandler: Operation: PutBlock , Trace ID:  , Message: Put 
Key failed , Result: IO_EXCEPTION , StorageContainerException Occurred.
   
   datanode4-1  | 
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException: 
Put Key failed
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:689)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.dispatchRequest(KeyValueHandler.java:340)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handle(KeyValueHandler.java:303)
   
   --
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061)
   
   datanode4-1  | at 
org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78)
   
   datanode4-1  | at 
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
   
   datanode4-1  | at 
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
   
   datanode4-1  | at java.base/java.lang.Thread.run(Thread.java:1583)
   
   datanode4-1  | Caused by: 
org.apache.hadoop.hdds.utils.db.RocksDatabaseException: **IOError**(NoSpace): 
RocksDatabase[/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db]:
 Failed to batchWrite
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.toRocksDatabaseException(RocksDatabase.java:112)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:809)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:814)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBBatchOperation.commit(RDBBatchOperation.java:417)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBStore.commitBatchOperation(RDBStore.java:283)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.persistPutBlock(BlockManagerImpl.java:259)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.putBlock(BlockManagerImpl.java:97)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:680)
   
   datanode4-1  | ... 16 more
   
   datanode4-1  | Caused by: org.rocksdb.RocksDBException: While appending to 
file: 
/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db/000004.log:
 **No space left on device**
   
   datanode4-1  | at org.rocksdb.RocksDB.write0(Native Method)
   
   datanode4-1  | at org.rocksdb.RocksDB.write(RocksDB.java:1708)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:806)
   
   datanode4-1  | ... 22 more
   
   datanode4-1  | 2026-02-18 11:49:50,965 
[a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-4554bed7-e070-4e1c-b806-42b3a389e7f9-4]
 WARN keyvalue.KeyValueHandler: Operation: PutBlock , Trace ID:  , Message: Put 
Key failed , Result: IO_EXCEPTION , StorageContainerException Occurred.
   
   datanode4-1  | 
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException: 
Put Key failed
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:689)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.dispatchRequest(KeyValueHandler.java:340)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handle(KeyValueHandler.java:303)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(HddsDispatcher.java:370)
   
   --
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.lambda$applyTransaction$9(ContainerStateMachine.java:1061)
   
   datanode4-1  | at 
org.apache.ratis.util.TaskQueue.lambda$submit$0(TaskQueue.java:121)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils.runAndLog(LogUtils.java:38)
   
   datanode4-1  | at org.apache.ratis.util.LogUtils$1.run(LogUtils.java:78)
   
   datanode4-1  | at 
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
   
   datanode4-1  | at 
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
   
   datanode4-1  | at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
   
   datanode4-1  | at java.base/java.lang.Thread.run(Thread.java:1583)
   
   datanode4-1  | Caused by: 
org.apache.hadoop.hdds.utils.db.RocksDatabaseException: **IOError**(NoSpace): 
RocksDatabase[/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db]:
 Failed to batchWrite
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.toRocksDatabaseException(RocksDatabase.java:112)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:809)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:814)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBBatchOperation.commit(RDBBatchOperation.java:417)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RDBStore.commitBatchOperation(RDBStore.java:283)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.persistPutBlock(BlockManagerImpl.java:259)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.putBlock(BlockManagerImpl.java:97)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:680)
   
   datanode4-1  | ... 16 more
   
   datanode4-1  | Caused by: org.rocksdb.RocksDBException: While appending to 
file: 
/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db/000004.log:
 **No space left on device**
   
   datanode4-1  | at org.rocksdb.RocksDB.write0(Native Method)
   
   datanode4-1  | at org.rocksdb.RocksDB.write(RocksDB.java:1708)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.utils.db.RocksDatabase.batchWrite(RocksDatabase.java:806)
   
   datanode4-1  | ... 22 more
   
   datanode4-1  | 2026-02-18 11:49:50,969 
[a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-e950ca02-71a4-4a0a-9f6d-f017676f7c32-1]
 WARN utils.DatanodeStoreCache: Opened db 
/data/metadata/db/CID-98fb688c-7973-47c8-93a8-9e16d29f1fad/DS-547dfa19-b2ee-4b0b-9304-f279c4748f4c/container.db
 in **read-only mode** after read-write open failed due to no space
   
   datanode4-1  | 2026-02-18 11:49:50,972 
[a2299bc6-3cc8-4d2b-b0f8-df0ecd2c6f55-ContainerOp-cb87e2ea-f964-4c8b-8f25-79b6a5242fb2-2]
 WARN keyvalue.KeyValueHandler: Operation: PutBlock , Trace ID:  , Message: Put 
Key failed , Result: IO_EXCEPTION , StorageContainerException Occurred.
   
   datanode4-1  | 
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException: 
Put Key failed
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handlePutBlock(KeyValueHandler.java:689)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.dispatchRequest(KeyValueHandler.java:340)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handle(KeyValueHandler.java:303)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(HddsDispatcher.java:370)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.lambda$dispatch$1(HddsDispatcher.java:195)
   
   datanode4-1  | at 
org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:89)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatch(HddsDispatcher.java:194)
   
   datanode4-1  | at 
org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine.dispatchCommand(ContainerStateMachine.java:567)
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to