abhishekrb19 commented on code in PR #15415:
URL: https://github.com/apache/druid/pull/15415#discussion_r1422758621
##########
server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java:
##########
@@ -3048,21 +3159,23 @@ private List<DataSegment>
createAndGetUsedYearSegments(final int startYear, fina
return segments;
}
- private ImmutableList<DataSegment>
retrieveUnusedSegmentsUsingMultipleIntervalsAndLimit(
+ private ImmutableList<DataSegment>
retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset(
Review Comment:
There's no offset anymore; maybe we can simply call this method
`retrieveUnusedSegments`?
##########
server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java:
##########
@@ -1206,23 +1206,86 @@ public void
testRetrieveUnusedSegmentsUsingMultipleIntervalsAndNoLimit() throws
final List<DataSegment> segments = createAndGetUsedYearSegments(1900,
2133);
markAllSegmentsUnused(new HashSet<>(segments));
- final ImmutableList<DataSegment> actualUnusedSegments =
retrieveUnusedSegmentsUsingMultipleIntervalsAndLimit(
+ final ImmutableList<DataSegment> actualUnusedSegments =
retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset(
segments.stream().map(DataSegment::getInterval).collect(Collectors.toList()),
+ null,
+ null,
+ null
+ );
+ Assert.assertEquals(segments.size(), actualUnusedSegments.size());
+ Assert.assertTrue(segments.containsAll(actualUnusedSegments));
+ }
+
+ @Test
+ public void testRetrieveUnusedSegmentsUsingNoIntervalsNoLimitNoOffset()
throws IOException
+ {
+ final List<DataSegment> segments = createAndGetUsedYearSegments(1900,
2133);
+ markAllSegmentsUnused(new HashSet<>(segments));
+
+ final ImmutableList<DataSegment> actualUnusedSegments =
retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset(
+ ImmutableList.of(),
+ null,
+ null,
null
);
Assert.assertEquals(segments.size(), actualUnusedSegments.size());
Assert.assertTrue(segments.containsAll(actualUnusedSegments));
}
+ @Test
+ public void testRetrieveUnusedSegmentsUsingNoIntervalsAndNoLimitAndOffset()
throws IOException
Review Comment:
Offset -> last segment id here and below
```suggestion
public void
testRetrieveUnusedSegmentsUsingNoIntervalsAndNoLimitAndNoLastSegmentId() throws
IOException
```
##########
server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java:
##########
@@ -1206,23 +1206,86 @@ public void
testRetrieveUnusedSegmentsUsingMultipleIntervalsAndNoLimit() throws
final List<DataSegment> segments = createAndGetUsedYearSegments(1900,
2133);
markAllSegmentsUnused(new HashSet<>(segments));
- final ImmutableList<DataSegment> actualUnusedSegments =
retrieveUnusedSegmentsUsingMultipleIntervalsAndLimit(
+ final ImmutableList<DataSegment> actualUnusedSegments =
retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset(
segments.stream().map(DataSegment::getInterval).collect(Collectors.toList()),
+ null,
+ null,
+ null
+ );
+ Assert.assertEquals(segments.size(), actualUnusedSegments.size());
+ Assert.assertTrue(segments.containsAll(actualUnusedSegments));
+ }
+
+ @Test
+ public void testRetrieveUnusedSegmentsUsingNoIntervalsNoLimitNoOffset()
throws IOException
Review Comment:
Adjust the test method names since there's no offset?
##########
server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java:
##########
@@ -955,6 +956,50 @@ public Optional<Iterable<DataSegment>>
iterateAllUsedNonOvershadowedSegmentsForD
.transform(timeline ->
timeline.findNonOvershadowedObjectsInInterval(interval,
Partitions.ONLY_COMPLETE));
}
+ /**
+ * Retrieves segments for a given datasource that are marked unused and that
are *fully contained by* an optionally
+ * specified interval. If the interval specified is null, this method will
retrieve all unused segments.
+ *
+ * This call does not return any information about realtime segments.
+ *
+ * @param datasource The name of the datasource
+ * @param interval The intervals to search over
+ * @param limit The limit of segments to return
Review Comment:
```suggestion
* @param limit an optional maximum number of results to return.
If none is specified, the results are not limited.
```
##########
server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java:
##########
@@ -955,6 +956,50 @@ public Optional<Iterable<DataSegment>>
iterateAllUsedNonOvershadowedSegmentsForD
.transform(timeline ->
timeline.findNonOvershadowedObjectsInInterval(interval,
Partitions.ONLY_COMPLETE));
}
+ /**
+ * Retrieves segments for a given datasource that are marked unused and that
are *fully contained by* an optionally
+ * specified interval. If the interval specified is null, this method will
retrieve all unused segments.
+ *
+ * This call does not return any information about realtime segments.
+ *
+ * @param datasource The name of the datasource
+ * @param interval The intervals to search over
Review Comment:
```suggestion
* @param interval an optional interval to search over.
```
##########
server/src/main/java/org/apache/druid/server/http/MetadataResource.java:
##########
@@ -334,6 +337,49 @@ public Response getUsedSegmentsInDataSourceForIntervals(
return builder.entity(Collections2.transform(segments,
DataSegment::getId)).build();
}
+ @GET
+ @Path("/datasources/{dataSourceName}/unusedSegments")
+ @Produces(MediaType.APPLICATION_JSON)
+ public Response getUnusedSegmentsInDataSource(
+ @Context final HttpServletRequest req,
+ @PathParam("dataSourceName") final String dataSource,
+ @QueryParam("interval") @Nullable String interval,
+ @QueryParam("limit") @Nullable Integer limit,
+ @QueryParam("lastSegmentId") @Nullable final String lastSegmentId,
+ @QueryParam("sortOrder") @Nullable final String sortOrder
+ )
+ {
+ if (dataSource == null || dataSource.isEmpty()) {
+ throw InvalidInput.exception("dataSourceName must be non-empty");
+ }
+ if (limit != null && limit < 0) {
+ throw InvalidInput.exception("Invalid limit[%s] specified. Limit must be
> 0", limit);
+ }
+
+ SortOrder theSortOrder = sortOrder == null ? null :
SortOrder.fromValue(sortOrder);
+
Review Comment:
Could we also add a validation check for `lastSegmentId` if it's non-nul? We
could use `org.apache.druid.timeline.SegmentId#tryParse` utility to validate
the id and throw an invalid input exception if the result of the `tryParse` is
null (the id failed to parse)
##########
server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java:
##########
@@ -236,6 +248,119 @@ public void testGetAllSegmentsIncludingRealtime()
Assert.assertEquals(new SegmentStatusInCluster(realTimeSegments[1], false,
null, 40L, true), resultList.get(5));
}
+ @Test
+ public void testGetUnusedSegmentsInDataSource()
+ {
+ Mockito.doAnswer(mockIterateAllUnusedSegmentsForDatasource())
+ .when(segmentsMetadataManager)
+ .iterateAllUnusedSegmentsForDatasource(
+ ArgumentMatchers.any(),
+ ArgumentMatchers.any(),
+ ArgumentMatchers.any(),
+ ArgumentMatchers.any(),
+ ArgumentMatchers.any());
+
+ // test with null datasource name - fails with expected bad datasource
name error
+ DruidExceptionMatcher.invalidInput().expectMessageIs(
+ "dataSourceName must be non-empty"
+ ).assertThrowsAndMatches(
+ () -> metadataResource.getUnusedSegmentsInDataSource(request, null,
null, null, null, null)
+ );
+
+ // test with empty datasource name - fails with expected bad datasource
name error
+ DruidExceptionMatcher.invalidInput().expectMessageIs(
+ "dataSourceName must be non-empty"
+ ).assertThrowsAndMatches(
+ () -> metadataResource.getUnusedSegmentsInDataSource(request, "",
null, null, null, null)
+ );
+
+ // test invalid datasource - returns empty segments
+ Response response = metadataResource.getUnusedSegmentsInDataSource(
+ request,
+ "invalid_datasource",
+ null,
+ null,
+ null,
+ null
+ );
+ List<DataSegment> resultList = extractResponseList(response);
+ Assert.assertTrue(resultList.isEmpty());
+
+ // test valid datasource with bad limit - fails with expected bad limit
message
+ DruidExceptionMatcher.invalidInput().expectMessageIs(
+ StringUtils.format("Invalid limit[%s] specified. Limit must be > 0",
-1)
+ ).assertThrowsAndMatches(
+ () -> metadataResource.getUnusedSegmentsInDataSource(request,
DATASOURCE1, null, -1, null, null)
+ );
+
+ // test valid datasource - returns all unused segments for that datasource
+ response = metadataResource.getUnusedSegmentsInDataSource(request,
DATASOURCE1, null, null, null, null);
+
+ resultList = extractResponseList(response);
+ Assert.assertEquals(Arrays.asList(segments), resultList);
+
+ // test valid datasource with interval filter - returns all unused
segments for that datasource within interval
+ int numDays = 2;
+ String interval = SEGMENT_START_INTERVAL + "_P" + numDays + "D";
+ response = metadataResource.getUnusedSegmentsInDataSource(request,
DATASOURCE1, interval, null, null, null);
+
+ resultList = extractResponseList(response);
+ Assert.assertEquals(NUM_PARTITIONS * numDays, resultList.size());
+ Assert.assertEquals(Arrays.asList(segments[0], segments[1], segments[2],
segments[3]), resultList);
+
+ // test valid datasource with interval filter and limit - returns unused
segments for that datasource within
+ // interval upto limit
+ int limit = 3;
+ response = metadataResource.getUnusedSegmentsInDataSource(request,
DATASOURCE1, interval, limit, null, null);
+
+ resultList = extractResponseList(response);
+ Assert.assertEquals(limit, resultList.size());
+ Assert.assertEquals(Arrays.asList(segments[0], segments[1], segments[2]),
resultList);
+
+ // test valid datasource with interval filter limit and offset - returns
unused segments for that datasource within
+ // interval upto limit starting at offset
Review Comment:
```suggestion
// test valid datasource with interval filter limit and last segment id
- returns unused segments for that datasource within
// interval upto limit starting at last segment id
```
##########
server/src/main/java/org/apache/druid/server/http/MetadataResource.java:
##########
@@ -334,6 +337,49 @@ public Response getUsedSegmentsInDataSourceForIntervals(
return builder.entity(Collections2.transform(segments,
DataSegment::getId)).build();
}
+ @GET
+ @Path("/datasources/{dataSourceName}/unusedSegments")
+ @Produces(MediaType.APPLICATION_JSON)
+ public Response getUnusedSegmentsInDataSource(
+ @Context final HttpServletRequest req,
+ @PathParam("dataSourceName") final String dataSource,
+ @QueryParam("interval") @Nullable String interval,
+ @QueryParam("limit") @Nullable Integer limit,
+ @QueryParam("lastSegmentId") @Nullable final String lastSegmentId,
+ @QueryParam("sortOrder") @Nullable final String sortOrder
+ )
+ {
+ if (dataSource == null || dataSource.isEmpty()) {
+ throw InvalidInput.exception("dataSourceName must be non-empty");
+ }
+ if (limit != null && limit < 0) {
+ throw InvalidInput.exception("Invalid limit[%s] specified. Limit must be
> 0", limit);
+ }
+
+ SortOrder theSortOrder = sortOrder == null ? null :
SortOrder.fromValue(sortOrder);
+
+ final Interval theInterval = interval != null ?
Intervals.of(interval.replace('_', '/')) : null;
+ Iterable<DataSegment> unusedSegments =
segmentsMetadataManager.iterateAllUnusedSegmentsForDatasource(
+ dataSource,
+ theInterval,
+ limit,
+ lastSegmentId,
+ theSortOrder
+ );
+
+ final Function<DataSegment, Iterable<ResourceAction>> raGenerator =
segment -> Collections.singletonList(
+
AuthorizationUtils.DATASOURCE_READ_RA_GENERATOR.apply(segment.getDataSource()));
+
+ final Iterable<DataSegment> authorizedSegments =
+ AuthorizationUtils.filterAuthorizedResources(req, unusedSegments,
raGenerator, authorizerMapper);
+
+ // sort by earliest start interval first, then end interval. DataSegment
are sorted in this same order due to
Review Comment:
Do we need this comment here since the sorting actually happens inside
`iterateAllUnusedSegmentsForDatasource`? Also, this is missing last segment id
in the sort by criteria
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]