This is an automated email from the ASF dual-hosted git repository. xiangfu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 321111595df Add support for SNAPSHOT_WITH_DELETE validDocIdsType to server APIs. (#16726) 321111595df is described below commit 321111595dfae6828ce3a73513355bdcb6181d1d Author: Abhishek Bafna <aba...@startree.ai> AuthorDate: Wed Sep 3 08:24:26 2025 +0530 Add support for SNAPSHOT_WITH_DELETE validDocIdsType to server APIs. (#16726) * Add support for SNAPSHOT_WITH_DELETE validDocIdsType to server APIs. * removing some redundant tests. --------- Co-authored-by: abhishekbafna <abhishek.ba...@startree.ai> --- .../common/restlet/resources/ValidDocIdsType.java | 5 + .../pinot/server/api/resources/TablesResource.java | 4 + .../apache/pinot/server/api/BaseResourceTest.java | 7 +- .../pinot/server/api/TablesResourceTest.java | 134 +++++++++++++++++++-- .../test/resources/data/test_data_with_delete.avro | Bin 0 -> 15615184 bytes 5 files changed, 138 insertions(+), 12 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/ValidDocIdsType.java b/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/ValidDocIdsType.java index a8fbb129dc1..3ffb94e69d6 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/ValidDocIdsType.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/ValidDocIdsType.java @@ -23,6 +23,11 @@ public enum ValidDocIdsType { // Pinot segment. UpsertConfig's 'enableSnapshot' must be enabled for this type. SNAPSHOT, + // This indicates that the validDocIds bitmap is loaded from the snapshot from the Pinot segment. + // The valid document ids here does take account into the deleted records. UpsertConfig's 'enableSnapshot' must be + // enabled for this type. UpsertConfig's 'deleteRecordColumn' must be provided for this type. + SNAPSHOT_WITH_DELETE, + // This indicates that the validDocIds bitmap is loaded from the real-time server's in-memory. // // NOTE: Using in-memory based validDocids bitmap is a bit dangerous as it will not give us the consistency in some diff --git a/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java b/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java index d8493ebaf66..a2a2e691141 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java @@ -761,6 +761,10 @@ public class TablesResource { case SNAPSHOT: return Pair.of(validDocIdsType, ((ImmutableSegmentImpl) indexSegment).loadDocIdsFromSnapshot(V1Constants.VALID_DOC_IDS_SNAPSHOT_FILE_NAME)); + case SNAPSHOT_WITH_DELETE: + return Pair.of(validDocIdsType, + ((ImmutableSegmentImpl) indexSegment).loadDocIdsFromSnapshot( + V1Constants.QUERYABLE_DOC_IDS_SNAPSHOT_FILE_NAME)); case IN_MEMORY: return Pair.of(validDocIdsType, indexSegment.getValidDocIds().getMutableRoaringBitmap()); case IN_MEMORY_WITH_DELETE: diff --git a/pinot-server/src/test/java/org/apache/pinot/server/api/BaseResourceTest.java b/pinot-server/src/test/java/org/apache/pinot/server/api/BaseResourceTest.java index 93b61911145..b3c194954f7 100644 --- a/pinot-server/src/test/java/org/apache/pinot/server/api/BaseResourceTest.java +++ b/pinot-server/src/test/java/org/apache/pinot/server/api/BaseResourceTest.java @@ -75,7 +75,10 @@ import static org.testng.Assert.assertTrue; public abstract class BaseResourceTest { - private static final String AVRO_DATA_PATH = "data/test_data-mv.avro"; + protected String getAvroFileName() { + return "data/test_data-mv.avro"; + } + private static final File TEMP_DIR = new File(FileUtils.getTempDirectory(), "BaseResourceTest"); protected static final String TABLE_NAME = "testTable"; protected static final String LLC_SEGMENT_NAME_FOR_UPLOAD_SUCCESS = @@ -104,7 +107,7 @@ public abstract class BaseResourceTest { FileUtils.deleteQuietly(TEMP_DIR); assertTrue(TEMP_DIR.mkdirs()); - URL resourceUrl = getClass().getClassLoader().getResource(AVRO_DATA_PATH); + URL resourceUrl = getClass().getClassLoader().getResource(getAvroFileName()); assertNotNull(resourceUrl); _avroFile = new File(resourceUrl.getFile()); diff --git a/pinot-server/src/test/java/org/apache/pinot/server/api/TablesResourceTest.java b/pinot-server/src/test/java/org/apache/pinot/server/api/TablesResourceTest.java index 2ed2af463d3..42c78660142 100644 --- a/pinot-server/src/test/java/org/apache/pinot/server/api/TablesResourceTest.java +++ b/pinot-server/src/test/java/org/apache/pinot/server/api/TablesResourceTest.java @@ -213,10 +213,10 @@ public class TablesResourceTest extends BaseResourceTest { Assert.assertEquals(jsonResponse.get("columns").size(), 2); Assert.assertEquals(jsonResponse.get("indexes").size(), 2); Assert.assertNotNull(jsonResponse.get("columns").get(0).get("indexSizeMap")); - Assert.assertEquals(jsonResponse.get("columns").get(0).get("indexSizeMap").get("forward_index").asText(), "200008"); + Assert.assertEquals(jsonResponse.get("columns").get(0).get("indexSizeMap").get("forward_index").asText(), "400008"); Assert.assertEquals(jsonResponse.get("columns").get(0).get("indexSizeMap").get("dictionary").asText(), "206384"); Assert.assertNotNull(jsonResponse.get("columns").get(1).get("indexSizeMap")); - Assert.assertEquals(jsonResponse.get("columns").get(1).get("indexSizeMap").get("forward_index").asText(), "200008"); + Assert.assertEquals(jsonResponse.get("columns").get(1).get("indexSizeMap").get("forward_index").asText(), "400008"); Assert.assertEquals(jsonResponse.get("columns").get(1).get("indexSizeMap").get("dictionary").asText(), "168976"); Assert.assertEquals(jsonResponse.get("indexes").get("column1").get("h3-index").asText(), "NO"); Assert.assertEquals(jsonResponse.get("indexes").get("column1").get("fst-index").asText(), "NO"); @@ -320,10 +320,10 @@ public class TablesResourceTest extends BaseResourceTest { .get(String.class); JsonNode validDocIdMetadata = JsonUtils.stringToJsonNode(metadataResponse).get(0); - Assert.assertEquals(validDocIdMetadata.get("totalDocs").asInt(), 100000); + Assert.assertEquals(validDocIdMetadata.get("totalDocs").asInt(), 200000); Assert.assertEquals(validDocIdMetadata.get("totalValidDocs").asInt(), 8); - Assert.assertEquals(validDocIdMetadata.get("totalInvalidDocs").asInt(), 99992); - Assert.assertEquals(validDocIdMetadata.get("segmentCrc").asText(), "1894900283"); + Assert.assertEquals(validDocIdMetadata.get("totalInvalidDocs").asInt(), 199992); + Assert.assertEquals(validDocIdMetadata.get("segmentCrc").asText(), "187068486"); Assert.assertEquals(validDocIdMetadata.get("validDocIdsType").asText(), "SNAPSHOT"); } @@ -346,12 +346,51 @@ public class TablesResourceTest extends BaseResourceTest { .post(Entity.json(tableSegments), String.class); JsonNode validDocIdsMetadata = JsonUtils.stringToJsonNode(response).get(0); - Assert.assertEquals(validDocIdsMetadata.get("totalDocs").asInt(), 100000); + Assert.assertEquals(validDocIdsMetadata.get("totalDocs").asInt(), 200000); Assert.assertEquals(validDocIdsMetadata.get("totalValidDocs").asInt(), 8); - Assert.assertEquals(validDocIdsMetadata.get("totalInvalidDocs").asInt(), 99992); - Assert.assertEquals(validDocIdsMetadata.get("segmentCrc").asText(), "1894900283"); + Assert.assertEquals(validDocIdsMetadata.get("totalInvalidDocs").asInt(), 199992); + Assert.assertEquals(validDocIdsMetadata.get("segmentCrc").asText(), "187068486"); Assert.assertEquals(validDocIdsMetadata.get("validDocIdsType").asText(), "SNAPSHOT"); - Assert.assertEquals(validDocIdsMetadata.get("segmentSizeInBytes").asLong(), 1877636); + Assert.assertEquals(validDocIdsMetadata.get("segmentSizeInBytes").asLong(), 4514723); + Assert.assertTrue(validDocIdsMetadata.has("segmentCreationTimeMillis")); + Assert.assertTrue(validDocIdsMetadata.get("segmentCreationTimeMillis").asLong() > 0); + + // Verify server status information + Assert.assertTrue(validDocIdsMetadata.has("serverStatus"), "Server status should be included in response"); + String serverStatus = validDocIdsMetadata.get("serverStatus").asText(); + Assert.assertNotNull(serverStatus, "Server status should not be null"); + Assert.assertEquals(serverStatus, "NOT_STARTED", serverStatus); + } + + @Test + public void testValidDocIdsMetadataPostForSnapshotWithDelete() + throws IOException { + IndexSegment segment = _realtimeIndexSegments.get(0); + // Verify the content of the downloaded snapshot from a realtime table. + downLoadAndVerifyValidDocIdsSnapshot(TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME), + (ImmutableSegmentImpl) segment); + downLoadAndVerifyValidDocIdsSnapshotBitmap(TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME), + (ImmutableSegmentImpl) segment); + + List<String> segments = List.of(segment.getSegmentName()); + TableSegments tableSegments = new TableSegments(segments); + String validDocIdsMetadataPath = + "/tables/" + TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME) + "/validDocIdsMetadata"; + + // Test the new SNAPSHOT_WITH_DELETE validDocIdsType + String response = _webTarget.path(validDocIdsMetadataPath) + .queryParam("segmentNames", segment.getSegmentName()) + .queryParam("validDocIdsType", ValidDocIdsType.SNAPSHOT_WITH_DELETE.toString()) + .request() + .post(Entity.json(tableSegments), String.class); + JsonNode validDocIdsMetadata = JsonUtils.stringToJsonNode(response).get(0); + + Assert.assertEquals(validDocIdsMetadata.get("totalDocs").asInt(), 200000); + Assert.assertEquals(validDocIdsMetadata.get("totalValidDocs").asInt(), 8); + Assert.assertEquals(validDocIdsMetadata.get("totalInvalidDocs").asInt(), 199992); + Assert.assertEquals(validDocIdsMetadata.get("segmentCrc").asText(), "187068486"); + Assert.assertEquals(validDocIdsMetadata.get("validDocIdsType").asText(), "SNAPSHOT_WITH_DELETE"); + Assert.assertEquals(validDocIdsMetadata.get("segmentSizeInBytes").asLong(), 4514723); Assert.assertTrue(validDocIdsMetadata.has("segmentCreationTimeMillis")); Assert.assertTrue(validDocIdsMetadata.get("segmentCreationTimeMillis").asLong() > 0); @@ -413,6 +452,13 @@ public class TablesResourceTest extends BaseResourceTest { FileUtils.writeByteArrayToFile(validDocIdsSnapshotFile, RoaringBitmapUtils.serialize(validDocIdsSnapshot.getMutableRoaringBitmap())); + // Create the queryableDocIds snapshot file needed for SNAPSHOT_WITH_DELETE + File queryableDocIdsSnapshotFile = + new File(SegmentDirectoryPaths.findSegmentDirectory(segment.getSegmentMetadata().getIndexDir()), + V1Constants.QUERYABLE_DOC_IDS_SNAPSHOT_FILE_NAME); + FileUtils.writeByteArrayToFile(queryableDocIdsSnapshotFile, + RoaringBitmapUtils.serialize(queryableDocIds.getMutableRoaringBitmap())); + // Check no type (default should be validDocIdsSnapshot) Response response = _webTarget.path(snapshotPath).request().get(Response.class); Assert.assertEquals(response.getStatus(), Response.Status.OK.getStatusCode()); @@ -472,7 +518,15 @@ public class TablesResourceTest extends BaseResourceTest { V1Constants.VALID_DOC_IDS_SNAPSHOT_FILE_NAME); FileUtils.writeByteArrayToFile(validDocIdsSnapshotFile, RoaringBitmapUtils.serialize(validDocIdsSnapshot.getMutableRoaringBitmap())); - String expectedSegmentCrc = "1894900283"; + + // Create the queryableDocIds snapshot file needed for SNAPSHOT_WITH_DELETE + File queryableDocIdsSnapshotFile = + new File(SegmentDirectoryPaths.findSegmentDirectory(segment.getSegmentMetadata().getIndexDir()), + V1Constants.QUERYABLE_DOC_IDS_SNAPSHOT_FILE_NAME); + FileUtils.writeByteArrayToFile(queryableDocIdsSnapshotFile, + RoaringBitmapUtils.serialize(queryableDocIds.getMutableRoaringBitmap())); + + String expectedSegmentCrc = "187068486"; // Check no type (default should be validDocIdsSnapshot) ValidDocIdsBitmapResponse response = _webTarget.path(snapshotPath).request().get(ValidDocIdsBitmapResponse.class); @@ -521,6 +575,60 @@ public class TablesResourceTest extends BaseResourceTest { queryableDocIds.getMutableRoaringBitmap()); } + @Test + public void testValidDocIdsMetadataGetForSnapshotWithDelete() + throws IOException { + IndexSegment segment = _realtimeIndexSegments.get(0); + // Verify the content of the downloaded snapshot from a realtime table. + downLoadAndVerifyValidDocIdsSnapshot(TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME), + (ImmutableSegmentImpl) segment); + downLoadAndVerifyValidDocIdsSnapshotBitmap(TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME), + (ImmutableSegmentImpl) segment); + + String validDocIdsMetadataPath = + "/tables/" + TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME) + "/validDocIdMetadata"; + + // Test GET endpoint with SNAPSHOT_WITH_DELETE validDocIdsType + String response = _webTarget.path(validDocIdsMetadataPath) + .queryParam("segmentNames", segment.getSegmentName()) + .queryParam("validDocIdsType", ValidDocIdsType.SNAPSHOT_WITH_DELETE.toString()) + .request() + .get(String.class); + JsonNode validDocIdsMetadata = JsonUtils.stringToJsonNode(response).get(0); + + Assert.assertEquals(validDocIdsMetadata.get("totalDocs").asInt(), 200000); + Assert.assertEquals(validDocIdsMetadata.get("totalValidDocs").asInt(), 8); + Assert.assertEquals(validDocIdsMetadata.get("totalInvalidDocs").asInt(), 199992); + Assert.assertEquals(validDocIdsMetadata.get("segmentCrc").asText(), "187068486"); + Assert.assertEquals(validDocIdsMetadata.get("validDocIdsType").asText(), "SNAPSHOT_WITH_DELETE"); + } + + @Test + public void testValidDocIdsBitmapForSnapshotWithDelete() + throws IOException { + IndexSegment segment = _realtimeIndexSegments.get(0); + // Verify the content of the downloaded snapshot from a realtime table. + downLoadAndVerifyValidDocIdsSnapshot(TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME), + (ImmutableSegmentImpl) segment); + downLoadAndVerifyValidDocIdsSnapshotBitmap(TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME), + (ImmutableSegmentImpl) segment); + + String validDocIdsBitmapPath = "/segments/" + TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME) + + "/" + segment.getSegmentName() + "/validDocIdsBitmap"; + + // Test validDocIdsBitmap endpoint with SNAPSHOT_WITH_DELETE validDocIdsType + ValidDocIdsBitmapResponse response = _webTarget.path(validDocIdsBitmapPath) + .queryParam("validDocIdsType", ValidDocIdsType.SNAPSHOT_WITH_DELETE.toString()) + .request() + .get(ValidDocIdsBitmapResponse.class); + + Assert.assertNotNull(response); + Assert.assertEquals(response.getSegmentCrc(), "187068486"); + Assert.assertEquals(response.getSegmentName(), segment.getSegmentName()); + Assert.assertEquals(response.getValidDocIdsType(), ValidDocIdsType.SNAPSHOT_WITH_DELETE); + Assert.assertNotNull(response.getBitmap()); + } + @Test public void testUploadSegments() throws Exception { @@ -602,4 +710,10 @@ public class TablesResourceTest extends BaseResourceTest { .request().get(Response.class); Assert.assertEquals(response.getStatus(), Response.Status.NOT_FOUND.getStatusCode()); } + + // Override to use data with delete records + @Override + protected String getAvroFileName() { + return "data/test_data_with_delete.avro"; + } } diff --git a/pinot-server/src/test/resources/data/test_data_with_delete.avro b/pinot-server/src/test/resources/data/test_data_with_delete.avro new file mode 100644 index 00000000000..96616430d60 Binary files /dev/null and b/pinot-server/src/test/resources/data/test_data_with_delete.avro differ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org