[ https://issues.apache.org/jira/browse/ASTERIXDB-2487?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Taewoo Kim updated ASTERIXDB-2487: ---------------------------------- Description: The Clouberry cluster became UNUSUABLE after the nc-1 (among five NCs) generated the following exception. {code:java} 21:32:10.659 [Executor-10173:1] WARN org.apache.asterix.app.nc.IndexCheckpointManager - Couldn't find any checkpoint file for index io1/storage/partition_0/twitter/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382/0/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382. Content of dir are null. 21:32:10.659 [Executor-10172:1] WARN org.apache.asterix.app.nc.IndexCheckpointManager - Couldn't find any checkpoint file for index io2/storage/partition_1/twitter/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c/0/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c. Content of dir are null. 21:32:10.660 [Executor-10173:1] ERROR org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness - FLUSH operation.afterFinalize failed on {"class" : "LSMBTree", "dir" : "/home/waans11/asterixdb/io1/storage/partition_0/twitter/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382/0/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382", "memory" : [{"class":"LSMBTreeMemoryComponent", "state":"READABLE_UNWRITABLE_FLUSHING", "writers":0, "readers":1, "pendingFlushes":0, "id":"[9,9]"}, {"class":"LSMBTreeMemoryComponent", "state":"INACTIVE", "writers":0, "readers":0, "pendingFlushes":0, "id":"[8,8]"}], "disk" : 3, "num-scheduled-flushes":1, "current-memory-component":1} 21:32:10.660 [Executor-10173:1] ERROR org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness - FLUSH operation.afterFinalize failed on {"class" : "LSMBTree", "dir" : "/home/waans11/asterixdb/io1/storage/partition_0/twitter/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382/0/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382", "memory" : [{"class":"LSMBTreeMemoryComponent", "state":"READABLE_UNWRITABLE_FLUSHING", "writers":0, "readers":1, "pendingFlushes":0, "id":"[9,9]"}, {"class":"LSMBTreeMemoryComponent", "state":"INACTIVE", "writers":0, "readers":0, "pendingFlushes":0, "id":"[8,8]"}], "disk" : 3, "num-scheduled-flushes":1, "current-memory-component":1} java.lang.IllegalStateException: Couldn't find any checkpoints for resource: io1/storage/partition_0/twitter/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382/0/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382 at org.apache.asterix.app.nc.IndexCheckpointManager.getLatest(IndexCheckpointManager.java:145) ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.app.nc.IndexCheckpointManager.flushed(IndexCheckpointManager.java:86) ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.addComponentToCheckpoint(LSMIOOperationCallback.java:136) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.afterFinalize(LSMIOOperationCallback.java:123) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.doIo(LSMHarness.java:544) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:513) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:122) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:38) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:29) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_161] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_161] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_161] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_161] 21:32:10.663 [Executor-10172:1] ERROR org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness - FLUSH operation.afterFinalize failed on {"class" : "LSMBTree", "dir" : "/home/waans11/asterixdb/io2/storage/partition_1/twitter/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c/0/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c", "memory" : [{"class":"LSMBTreeMemoryComponent", "state":"READABLE_UNWRITABLE_FLUSHING", "writers":0, "readers":1, "pendingFlushes":0, "id":"[24,24]"}, {"class":"LSMBTreeMemoryComponent", "state":"INACTIVE", "writers":0, "readers":0, "pendingFlushes":0, "id":"[23,23]"}], "disk" : 4, "num-scheduled-flushes":1, "current-memory-component":1} java.lang.IllegalStateException: Couldn't find any checkpoints for resource: io2/storage/partition_1/twitter/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c/0/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c at org.apache.asterix.app.nc.IndexCheckpointManager.getLatest(IndexCheckpointManager.java:145) ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.app.nc.IndexCheckpointManager.flushed(IndexCheckpointManager.java:86) ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.addComponentToCheckpoint(LSMIOOperationCallback.java:136) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.afterFinalize(LSMIOOperationCallback.java:123) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.addComponentToCheckpoint(LSMIOOperationCallback.java:136) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.afterFinalize(LSMIOOperationCallback.java:123) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.doIo(LSMHarness.java:544) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:513) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:122) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:38) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:29) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_161] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_161] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_161] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_161] 21:32:10.690 [Executor-10172:1] ERROR org.apache.asterix.app.nc.HaltCallback - Operation org.apache.hyracks.storage.am.lsm.btree.impls.LSMBTreeFlushOperation@b305d582 has failed java.lang.IllegalStateException: Couldn't find any checkpoints for resource: io2/storage/partition_1/twitter/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c/0/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c at org.apache.asterix.app.nc.IndexCheckpointManager.getLatest(IndexCheckpointManager.java:145) ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.app.nc.IndexCheckpointManager.flushed(IndexCheckpointManager.java:86) ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.addComponentToCheckpoint(LSMIOOperationCallback.java:136) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.afterFinalize(LSMIOOperationCallback.java:123) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.doIo(LSMHarness.java:544) ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:513) ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:122) ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:38) ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:29) ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_161] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_161] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_161] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_161]{code} Add: Based on [~mhubail]'s advice, I checked the directory and found that the files exist there. {code:java} [waans11@americium ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382]$ ll -al total 43684 drwxr-xr-x 2 waans11 waans11 151 Nov 26 21:32 . drwxr-xr-x 3 waans11 waans11 55 Nov 24 22:36 .. -rw-r--r-- 1 waans11 waans11 43387480 Nov 26 08:22 0_5_b -rw-r--r-- 1 waans11 waans11 262160 Nov 26 08:22 0_5_f -rw-r--r-- 1 waans11 waans11 262160 Nov 26 14:28 6_6_b -rw-r--r-- 1 waans11 waans11 262160 Nov 26 14:28 6_6_f -rw-r--r-- 1 waans11 waans11 262160 Nov 26 19:40 7_7_b -rw-r--r-- 1 waans11 waans11 262160 Nov 26 19:40 7_7_f -rw-r--r-- 1 waans11 waans11 107 Nov 26 14:28 .idx_checkpoint_7 -rw-r--r-- 1 waans11 waans11 107 Nov 26 19:40 .idx_checkpoint_8 -rw-r--r-- 1 waans11 waans11 2953 Nov 24 22:36 .metadata{code} was: The Clouberry cluster became UNUSUABLE after the nc-1 (among five NCs) generated the following exception. {code:java} 21:32:10.660 [Executor-10173:1] ERROR org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness - FLUSH operation.afterFinalize failed on {"class" : "LSMBTree", "dir" : "/home/waans11/asterixdb/io1/storage/partition_0/twitter/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382/0/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382", "memory" : [{"class":"LSMBTreeMemoryComponent", "state":"READABLE_UNWRITABLE_FLUSHING", "writers":0, "readers":1, "pendingFlushes":0, "id":"[9,9]"}, {"class":"LSMBTreeMemoryComponent", "state":"INACTIVE", "writers":0, "readers":0, "pendingFlushes":0, "id":"[8,8]"}], "disk" : 3, "num-scheduled-flushes":1, "current-memory-component":1} java.lang.IllegalStateException: Couldn't find any checkpoints for resource: io1/storage/partition_0/twitter/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382/0/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382 at org.apache.asterix.app.nc.IndexCheckpointManager.getLatest(IndexCheckpointManager.java:145) ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.app.nc.IndexCheckpointManager.flushed(IndexCheckpointManager.java:86) ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.addComponentToCheckpoint(LSMIOOperationCallback.java:136) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.afterFinalize(LSMIOOperationCallback.java:123) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.doIo(LSMHarness.java:544) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:513) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:122) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:38) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:29) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_161] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_161] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_161] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_161] 21:32:10.663 [Executor-10172:1] ERROR org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness - FLUSH operation.afterFinalize failed on {"class" : "LSMBTree", "dir" : "/home/waans11/asterixdb/io2/storage/partition_1/twitter/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c/0/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c", "memory" : [{"class":"LSMBTreeMemoryComponent", "state":"READABLE_UNWRITABLE_FLUSHING", "writers":0, "readers":1, "pendingFlushes":0, "id":"[24,24]"}, {"class":"LSMBTreeMemoryComponent", "state":"INACTIVE", "writers":0, "readers":0, "pendingFlushes":0, "id":"[23,23]"}], "disk" : 4, "num-scheduled-flushes":1, "current-memory-component":1} java.lang.IllegalStateException: Couldn't find any checkpoints for resource: io2/storage/partition_1/twitter/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c/0/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c at org.apache.asterix.app.nc.IndexCheckpointManager.getLatest(IndexCheckpointManager.java:145) ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.app.nc.IndexCheckpointManager.flushed(IndexCheckpointManager.java:86) ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.addComponentToCheckpoint(LSMIOOperationCallback.java:136) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.afterFinalize(LSMIOOperationCallback.java:123) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.addComponentToCheckpoint(LSMIOOperationCallback.java:136) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.afterFinalize(LSMIOOperationCallback.java:123) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.doIo(LSMHarness.java:544) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:513) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:122) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:38) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:29) [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_161] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_161] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_161] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_161] 21:32:10.690 [Executor-10172:1] ERROR org.apache.asterix.app.nc.HaltCallback - Operation org.apache.hyracks.storage.am.lsm.btree.impls.LSMBTreeFlushOperation@b305d582 has failed java.lang.IllegalStateException: Couldn't find any checkpoints for resource: io2/storage/partition_1/twitter/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c/0/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c at org.apache.asterix.app.nc.IndexCheckpointManager.getLatest(IndexCheckpointManager.java:145) ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.app.nc.IndexCheckpointManager.flushed(IndexCheckpointManager.java:86) ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.addComponentToCheckpoint(LSMIOOperationCallback.java:136) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.afterFinalize(LSMIOOperationCallback.java:123) ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.doIo(LSMHarness.java:544) ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:513) ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:122) ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:38) ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:29) ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_161] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_161] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_161] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_161]{code} > Cluster becomes UNUSUABLE with "java.lang.IllegalStateException: Couldn't > find any checkpoints for resource" > ------------------------------------------------------------------------------------------------------------ > > Key: ASTERIXDB-2487 > URL: https://issues.apache.org/jira/browse/ASTERIXDB-2487 > Project: Apache AsterixDB > Issue Type: Bug > Reporter: Taewoo Kim > Priority: Major > Attachments: nc-1.log > > > The Clouberry cluster became UNUSUABLE after the nc-1 (among five NCs) > generated the following exception. > > {code:java} > 21:32:10.659 [Executor-10173:1] WARN > org.apache.asterix.app.nc.IndexCheckpointManager - Couldn't find any > checkpoint file for index > io1/storage/partition_0/twitter/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382/0/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382. > Content of dir are null. > 21:32:10.659 [Executor-10172:1] WARN > org.apache.asterix.app.nc.IndexCheckpointManager - Couldn't find any > checkpoint file for index > io2/storage/partition_1/twitter/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c/0/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c. > Content of dir are null. > 21:32:10.660 [Executor-10173:1] ERROR > org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness - FLUSH > operation.afterFinalize failed on {"class" : "LSMBTree", "dir" : > "/home/waans11/asterixdb/io1/storage/partition_0/twitter/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382/0/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382", > "memory" : [{"class":"LSMBTreeMemoryComponent", > "state":"READABLE_UNWRITABLE_FLUSHING", "writers":0, "readers":1, > "pendingFlushes":0, "id":"[9,9]"}, {"class":"LSMBTreeMemoryComponent", > "state":"INACTIVE", "writers":0, "readers":0, "pendingFlushes":0, > "id":"[8,8]"}], "disk" : 3, "num-scheduled-flushes":1, > "current-memory-component":1} > 21:32:10.660 [Executor-10173:1] ERROR > org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness - FLUSH > operation.afterFinalize failed on {"class" : "LSMBTree", "dir" : > "/home/waans11/asterixdb/io1/storage/partition_0/twitter/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382/0/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382", > "memory" : [{"class":"LSMBTreeMemoryComponent", > "state":"READABLE_UNWRITABLE_FLUSHING", "writers":0, "readers":1, > "pendingFlushes":0, "id":"[9,9]"}, {"class":"LSMBTreeMemoryComponent", > "state":"INACTIVE", "writers":0, "readers":0, "pendingFlushes":0, > "id":"[8,8]"}], "disk" : 3, "num-scheduled-flushes":1, > "current-memory-component":1} > java.lang.IllegalStateException: Couldn't find any checkpoints for resource: > io1/storage/partition_0/twitter/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382/0/ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382 > at > org.apache.asterix.app.nc.IndexCheckpointManager.getLatest(IndexCheckpointManager.java:145) > ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.asterix.app.nc.IndexCheckpointManager.flushed(IndexCheckpointManager.java:86) > ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.addComponentToCheckpoint(LSMIOOperationCallback.java:136) > ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.afterFinalize(LSMIOOperationCallback.java:123) > ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.doIo(LSMHarness.java:544) > [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:513) > [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:122) > [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:38) > [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:29) > [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_161] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [?:1.8.0_161] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [?:1.8.0_161] > at java.lang.Thread.run(Thread.java:748) [?:1.8.0_161] > 21:32:10.663 [Executor-10172:1] ERROR > org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness - FLUSH > operation.afterFinalize failed on {"class" : "LSMBTree", "dir" : > "/home/waans11/asterixdb/io2/storage/partition_1/twitter/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c/0/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c", > "memory" : [{"class":"LSMBTreeMemoryComponent", > "state":"READABLE_UNWRITABLE_FLUSHING", "writers":0, "readers":1, > "pendingFlushes":0, "id":"[24,24]"}, {"class":"LSMBTreeMemoryComponent", > "state":"INACTIVE", "writers":0, "readers":0, "pendingFlushes":0, > "id":"[23,23]"}], "disk" : 4, "num-scheduled-flushes":1, > "current-memory-component":1} > java.lang.IllegalStateException: Couldn't find any checkpoints for resource: > io2/storage/partition_1/twitter/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c/0/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c > at > org.apache.asterix.app.nc.IndexCheckpointManager.getLatest(IndexCheckpointManager.java:145) > ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.asterix.app.nc.IndexCheckpointManager.flushed(IndexCheckpointManager.java:86) > ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.addComponentToCheckpoint(LSMIOOperationCallback.java:136) > ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.afterFinalize(LSMIOOperationCallback.java:123) > ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.addComponentToCheckpoint(LSMIOOperationCallback.java:136) > ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.afterFinalize(LSMIOOperationCallback.java:123) > ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.doIo(LSMHarness.java:544) > [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:513) > [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:122) > [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:38) > [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:29) > [hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_161] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [?:1.8.0_161] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [?:1.8.0_161] > at java.lang.Thread.run(Thread.java:748) [?:1.8.0_161] > 21:32:10.690 [Executor-10172:1] ERROR org.apache.asterix.app.nc.HaltCallback > - Operation > org.apache.hyracks.storage.am.lsm.btree.impls.LSMBTreeFlushOperation@b305d582 > has failed > java.lang.IllegalStateException: Couldn't find any checkpoints for resource: > io2/storage/partition_1/twitter/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c/0/ds_tweet_9460370bb0ca1c98a779b1bcc6861c2c > at > org.apache.asterix.app.nc.IndexCheckpointManager.getLatest(IndexCheckpointManager.java:145) > ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.asterix.app.nc.IndexCheckpointManager.flushed(IndexCheckpointManager.java:86) > ~[asterix-app-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.addComponentToCheckpoint(LSMIOOperationCallback.java:136) > ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback.afterFinalize(LSMIOOperationCallback.java:123) > ~[asterix-common-0.9.5-SNAPSHOT.jar:0.9.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.doIo(LSMHarness.java:544) > ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:513) > ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:122) > ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:38) > ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.common.impls.FlushOperation.call(FlushOperation.java:29) > ~[hyracks-storage-am-lsm-common-0.3.5-SNAPSHOT.jar:0.3.5-SNAPSHOT] > at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_161] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [?:1.8.0_161] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [?:1.8.0_161] > at java.lang.Thread.run(Thread.java:748) [?:1.8.0_161]{code} > Add: Based on [~mhubail]'s advice, I checked the directory and found that the > files exist there. > > {code:java} > [waans11@americium ds_tweet_e9ad9c2394f7dc7b6a69fb43e52a7382]$ ll -al > total 43684 > drwxr-xr-x 2 waans11 waans11 151 Nov 26 21:32 . > drwxr-xr-x 3 waans11 waans11 55 Nov 24 22:36 .. > -rw-r--r-- 1 waans11 waans11 43387480 Nov 26 08:22 0_5_b > -rw-r--r-- 1 waans11 waans11 262160 Nov 26 08:22 0_5_f > -rw-r--r-- 1 waans11 waans11 262160 Nov 26 14:28 6_6_b > -rw-r--r-- 1 waans11 waans11 262160 Nov 26 14:28 6_6_f > -rw-r--r-- 1 waans11 waans11 262160 Nov 26 19:40 7_7_b > -rw-r--r-- 1 waans11 waans11 262160 Nov 26 19:40 7_7_f > -rw-r--r-- 1 waans11 waans11 107 Nov 26 14:28 .idx_checkpoint_7 > -rw-r--r-- 1 waans11 waans11 107 Nov 26 19:40 .idx_checkpoint_8 > -rw-r--r-- 1 waans11 waans11 2953 Nov 24 22:36 .metadata{code} > > -- This message was sent by Atlassian JIRA (v7.6.3#76005)