iit2009060 commented on code in PR #14482: URL: https://github.com/apache/kafka/pull/14482#discussion_r1346090411
########## core/src/test/scala/unit/kafka/log/remote/RemoteIndexCacheTest.scala: ########## @@ -541,6 +541,126 @@ class RemoteIndexCacheTest { verifyFetchIndexInvocation(count = 1) } + @Test + def testOffsetIndexFileAlreadyExistOnDiskButNotInCache(): Unit ={ + val remoteIndexCacheDir = new File(tpDir,RemoteIndexCache.DIR_NAME) + val tempSuffix = ".tmptest" + def getRemoteCacheIndexFileFromDisk(suffix: String) = { + Files.walk(remoteIndexCacheDir.toPath) + .filter(Files.isRegularFile(_)) + .filter(path => path.getFileName.toString.endsWith(suffix)) + .findAny() + } + def renameRemoteCacheIndexFileFromDisk(suffix: String) = { + Files.walk(remoteIndexCacheDir.toPath) + .filter(Files.isRegularFile(_)) + .filter(path => path.getFileName.toString.endsWith(suffix)) + .forEach(f => Files.move(f,f.resolveSibling(f.getFileName().toString().stripSuffix(tempSuffix)))) + } + val entry = cache.getIndexEntry(rlsMetadata) + // copy files with temporary name + Files.copy(entry.offsetIndex().file().toPath(),Paths.get(Utils.replaceSuffix(entry.offsetIndex().file().getPath(),"",tempSuffix))) + Files.copy(entry.txnIndex().file().toPath(),Paths.get(Utils.replaceSuffix(entry.txnIndex().file().getPath(),"",tempSuffix))) + Files.copy(entry.timeIndex().file().toPath(),Paths.get(Utils.replaceSuffix(entry.timeIndex().file().getPath(),"",tempSuffix))) + + cache.internalCache().invalidate(rlsMetadata.remoteLogSegmentId().id()) + + // wait until entry is marked for deletion + TestUtils.waitUntilTrue(() => entry.isMarkedForCleanup, + "Failed to mark cache entry for cleanup after invalidation") + TestUtils.waitUntilTrue(() => entry.isCleanStarted, + "Failed to cleanup cache entry after invalidation") + + // restore index files + renameRemoteCacheIndexFileFromDisk(tempSuffix) + // validate cache entry for the above key should be null + assertNull(cache.internalCache().getIfPresent(rlsMetadata.remoteLogSegmentId().id())) + cache.getIndexEntry(rlsMetadata) + // Index Files already exist + // rsm should not be called again + // instead files exist on disk + // should be used + verifyFetchIndexInvocation(count = 1) + // verify index files on disk + assertTrue(getRemoteCacheIndexFileFromDisk(LogFileUtils.INDEX_FILE_SUFFIX).isPresent, s"Offset index file should be present on disk at ${remoteIndexCacheDir.toPath}") + assertTrue(getRemoteCacheIndexFileFromDisk(LogFileUtils.TXN_INDEX_FILE_SUFFIX).isPresent, s"Txn index file should be present on disk at ${remoteIndexCacheDir.toPath}") + assertTrue(getRemoteCacheIndexFileFromDisk(LogFileUtils.TIME_INDEX_FILE_SUFFIX).isPresent, s"Time index file should be present on disk at ${remoteIndexCacheDir.toPath}") + } + + @Test + def testRSMReturnCorruptedIndexFile(): Unit ={ + + when(rsm.fetchIndex(any(classOf[RemoteLogSegmentMetadata]), any(classOf[IndexType]))) + .thenAnswer(ans => { + val metadata = ans.getArgument[RemoteLogSegmentMetadata](0) + val indexType = ans.getArgument[IndexType](1) + val pw = new PrintWriter(remoteOffsetIndexFile(tpDir, metadata)) + pw.write("Hello, world") + // The size of the string written in the file is 12 bytes, + // but it should be multiple of Offset Index EntrySIZE which is equal to 8. + pw.close() + val offsetIdx = createOffsetIndexForSegmentMetadata(metadata) Review Comment: @divijvaidya We can keep this function i.e createOffsetIndexForSegmentMetadata to be used for indexes received from remote storage rather than remoteindexcache. The existing test case are working because they directly put values into internalCache(Caeffine) rather than the getIndexEntry route. In the above test case I want to corrupt the Index files received from remote storage rather than from remote index cache. getIndexEntry does following steps 1. Fetch indexes from remote Storage( i.e indexes stored in tpDir directory as per the mock) 2. Manually Corrupt the indexes file stored in tpDir Directory after fetch from remote storage. 3. It tries to copy indexes files in tpDir directory to remote index cache directory. 4. Run sanity check on indexes stored in tpDir/remoteindexcache directory. 5. It throws CorruptedIndexException. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org