showuon commented on code in PR #14482:
URL: https://github.com/apache/kafka/pull/14482#discussion_r1346739560


##########
core/src/test/scala/unit/kafka/log/remote/RemoteIndexCacheTest.scala:
##########
@@ -541,6 +542,131 @@ class RemoteIndexCacheTest {
     verifyFetchIndexInvocation(count = 1)
   }
 
+  @Test
+  def testIndexFileAlreadyExistOnDiskButNotInCache(): Unit = {

Review Comment:
   Will this situation happen in current logic? If so, it should be a bug, 
right? @divijvaidya , WDYT?



##########
core/src/test/scala/unit/kafka/log/remote/RemoteIndexCacheTest.scala:
##########
@@ -541,6 +542,131 @@ class RemoteIndexCacheTest {
     verifyFetchIndexInvocation(count = 1)
   }
 
+  @Test
+  def testIndexFileAlreadyExistOnDiskButNotInCache(): Unit = {
+    val remoteIndexCacheDir = cache.cacheDir()
+    val tempSuffix = ".tmptest"
+
+    def getRemoteCacheIndexFileFromDisk(suffix: String) = {
+      Files.walk(remoteIndexCacheDir.toPath)
+        .filter(Files.isRegularFile(_))
+        .filter(path => path.getFileName.toString.endsWith(suffix))
+        .findAny()
+    }
+
+    def renameRemoteCacheIndexFileFromDisk(suffix: String) = {
+      Files.walk(remoteIndexCacheDir.toPath)
+        .filter(Files.isRegularFile(_))
+        .filter(path => path.getFileName.toString.endsWith(suffix))
+        .forEach(f => Utils.atomicMoveWithFallback(f, 
f.resolveSibling(f.getFileName().toString().stripSuffix(tempSuffix))))
+    }
+
+    val entry = cache.getIndexEntry(rlsMetadata)
+    // copy files with temporary name
+    Files.copy(entry.offsetIndex().file().toPath(), 
Paths.get(Utils.replaceSuffix(entry.offsetIndex().file().getPath(), "", 
tempSuffix)))
+    Files.copy(entry.txnIndex().file().toPath(), 
Paths.get(Utils.replaceSuffix(entry.txnIndex().file().getPath(), "", 
tempSuffix)))
+    Files.copy(entry.timeIndex().file().toPath(), 
Paths.get(Utils.replaceSuffix(entry.timeIndex().file().getPath(), "", 
tempSuffix)))
+
+    cache.internalCache().invalidate(rlsMetadata.remoteLogSegmentId().id())
+
+    // wait until entry is marked for deletion
+    TestUtils.waitUntilTrue(() => entry.isMarkedForCleanup,
+      "Failed to mark cache entry for cleanup after invalidation")
+    TestUtils.waitUntilTrue(() => entry.isCleanStarted,
+      "Failed to cleanup cache entry after invalidation")
+
+    // restore index files
+    renameRemoteCacheIndexFileFromDisk(tempSuffix)
+    // validate cache entry for the above key should be  null

Review Comment:
   nit: additional space before null.



##########
core/src/test/scala/unit/kafka/log/remote/RemoteIndexCacheTest.scala:
##########
@@ -541,6 +541,126 @@ class RemoteIndexCacheTest {
     verifyFetchIndexInvocation(count = 1)
   }
 
+  @Test
+  def testOffsetIndexFileAlreadyExistOnDiskButNotInCache(): Unit ={
+    val remoteIndexCacheDir = new File(tpDir,RemoteIndexCache.DIR_NAME)
+    val tempSuffix = ".tmptest"
+    def getRemoteCacheIndexFileFromDisk(suffix: String) = {
+      Files.walk(remoteIndexCacheDir.toPath)
+        .filter(Files.isRegularFile(_))
+        .filter(path => path.getFileName.toString.endsWith(suffix))
+        .findAny()
+    }
+    def renameRemoteCacheIndexFileFromDisk(suffix: String) = {
+      Files.walk(remoteIndexCacheDir.toPath)
+        .filter(Files.isRegularFile(_))
+        .filter(path => path.getFileName.toString.endsWith(suffix))
+        .forEach(f => 
Files.move(f,f.resolveSibling(f.getFileName().toString().stripSuffix(tempSuffix))))
+    }
+    val entry = cache.getIndexEntry(rlsMetadata)
+    // copy files with temporary name
+    
Files.copy(entry.offsetIndex().file().toPath(),Paths.get(Utils.replaceSuffix(entry.offsetIndex().file().getPath(),"",tempSuffix)))
+    
Files.copy(entry.txnIndex().file().toPath(),Paths.get(Utils.replaceSuffix(entry.txnIndex().file().getPath(),"",tempSuffix)))
+    
Files.copy(entry.timeIndex().file().toPath(),Paths.get(Utils.replaceSuffix(entry.timeIndex().file().getPath(),"",tempSuffix)))
+
+    cache.internalCache().invalidate(rlsMetadata.remoteLogSegmentId().id())
+
+    // wait until entry is marked for deletion
+    TestUtils.waitUntilTrue(() => entry.isMarkedForCleanup,
+      "Failed to mark cache entry for cleanup after invalidation")
+    TestUtils.waitUntilTrue(() => entry.isCleanStarted,
+      "Failed to cleanup cache entry after invalidation")
+
+    // restore index files
+    renameRemoteCacheIndexFileFromDisk(tempSuffix)
+    // validate cache entry for the above key should be  null
+    
assertNull(cache.internalCache().getIfPresent(rlsMetadata.remoteLogSegmentId().id()))
+    cache.getIndexEntry(rlsMetadata)
+    // Index  Files already exist
+    // rsm should not be called again
+    // instead files exist on disk
+    // should be used
+    verifyFetchIndexInvocation(count = 1)
+    // verify index files on disk
+    
assertTrue(getRemoteCacheIndexFileFromDisk(LogFileUtils.INDEX_FILE_SUFFIX).isPresent,
 s"Offset index file should be present on disk at 
${remoteIndexCacheDir.toPath}")
+    
assertTrue(getRemoteCacheIndexFileFromDisk(LogFileUtils.TXN_INDEX_FILE_SUFFIX).isPresent,
 s"Txn index file should be present on disk at ${remoteIndexCacheDir.toPath}")
+    
assertTrue(getRemoteCacheIndexFileFromDisk(LogFileUtils.TIME_INDEX_FILE_SUFFIX).isPresent,
 s"Time index file should be present on disk at ${remoteIndexCacheDir.toPath}")
+  }
+
+  @Test
+  def testRSMReturnCorruptedIndexFile(): Unit ={
+
+    when(rsm.fetchIndex(any(classOf[RemoteLogSegmentMetadata]), 
any(classOf[IndexType])))
+      .thenAnswer(ans => {
+        val metadata = ans.getArgument[RemoteLogSegmentMetadata](0)
+        val indexType = ans.getArgument[IndexType](1)
+        val pw =  new PrintWriter(remoteOffsetIndexFile(tpDir, metadata))
+        pw.write("Hello, world")
+        // The size of the string written in the file is 12 bytes,
+        // but it should be multiple of Offset Index EntrySIZE which is equal 
to 8.
+        pw.close()
+        val offsetIdx = createOffsetIndexForSegmentMetadata(metadata)

Review Comment:
   @iit2009060 , if / else is fine with me, just make the test clear.
   
   For txnIndex corruption, I usually check other tests if they did something 
similar as we want. Here's a good reference:
   
   
https://github.com/apache/kafka/blob/fbc39bcdbe5ebcb07435f467b77f4b7df2e910f4/core/src/test/scala/unit/kafka/log/TransactionIndexTest.scala#L62-L73



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to