cryptoe commented on code in PR #16676:
URL: https://github.com/apache/druid/pull/16676#discussion_r1673527053


##########
server/src/main/java/org/apache/druid/segment/metadata/CoordinatorSegmentMetadataCache.java:
##########
@@ -419,6 +541,94 @@ private Set<SegmentId> 
filterSegmentWithCachedSchema(Set<SegmentId> segmentIds)
     return cachedSegments;
   }
 
+  @Nullable
+  private Integer getReplicationFactor(SegmentId segmentId)
+  {
+    if (segmentReplicationStatus == null) {
+      return null;
+    }
+    SegmentReplicaCount replicaCountsInCluster = 
segmentReplicationStatus.getReplicaCountsInCluster(segmentId);
+    return replicaCountsInCluster == null ? null : 
replicaCountsInCluster.required();
+  }
+
+  @VisibleForTesting
+  protected void coldDatasourceSchemaExec()
+  {
+    Stopwatch stopwatch = Stopwatch.createStarted();
+
+    Set<String> dataSourceWithColdSegmentSet = new HashSet<>();
+
+    int datasources = 0;
+    int segments = 0;
+    int dataSourceWithColdSegments = 0;
+
+    Collection<ImmutableDruidDataSource> immutableDataSources =
+        
sqlSegmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments();
+
+    for (ImmutableDruidDataSource dataSource : immutableDataSources) {
+      datasources++;
+      Collection<DataSegment> dataSegments = dataSource.getSegments();
+
+      final Map<String, ColumnType> columnTypes = new LinkedHashMap<>();
+
+      for (DataSegment segment : dataSegments) {
+        Integer replicationFactor = getReplicationFactor(segment.getId());
+        if (replicationFactor != null && replicationFactor != 0) {
+          continue;
+        }
+        Optional<SchemaPayloadPlus> optionalSchema = 
segmentSchemaCache.getSchemaForSegment(segment.getId());
+        if (optionalSchema.isPresent()) {
+          RowSignature rowSignature = 
optionalSchema.get().getSchemaPayload().getRowSignature();
+          mergeRowSignature(columnTypes, rowSignature);
+        }
+        segments++;
+      }
+
+      if (columnTypes.isEmpty()) {
+        // this datasource doesn't have any cold segment
+        continue;
+      }
+
+      final RowSignature.Builder builder = RowSignature.builder();
+      columnTypes.forEach(builder::add);
+
+      RowSignature coldSignature = builder.build();
+
+      String dataSourceName = dataSource.getName();
+      dataSourceWithColdSegmentSet.add(dataSourceName);
+      dataSourceWithColdSegments++;
+
+      log.debug("[%s] signature from cold segments is [%s]", dataSourceName, 
coldSignature);
+
+      coldSchemaTable.put(dataSourceName, new 
DataSourceInformation(dataSourceName, coldSignature));
+    }
+
+    // remove any stale datasource from the map
+    coldSchemaTable.keySet().retainAll(dataSourceWithColdSegmentSet);
+
+    String executionStatsLog = StringUtils.format(
+        "Cold schema processing was slow, taking [%d] millis. "
+        + "Processed [%d] datasources, [%d] segments & [%d] 
datasourceWithColdSegments.",

Review Comment:
   ```suggestion
           + "Processed total [%d] datasources, [%d] segments. Found [%d] 
datasources with cold segments.",
   ```
   



##########
server/src/main/java/org/apache/druid/segment/metadata/CoordinatorSegmentMetadataCache.java:
##########
@@ -419,6 +541,94 @@ private Set<SegmentId> 
filterSegmentWithCachedSchema(Set<SegmentId> segmentIds)
     return cachedSegments;
   }
 
+  @Nullable
+  private Integer getReplicationFactor(SegmentId segmentId)
+  {
+    if (segmentReplicationStatus == null) {
+      return null;
+    }
+    SegmentReplicaCount replicaCountsInCluster = 
segmentReplicationStatus.getReplicaCountsInCluster(segmentId);
+    return replicaCountsInCluster == null ? null : 
replicaCountsInCluster.required();
+  }
+
+  @VisibleForTesting
+  protected void coldDatasourceSchemaExec()
+  {
+    Stopwatch stopwatch = Stopwatch.createStarted();
+
+    Set<String> dataSourceWithColdSegmentSet = new HashSet<>();
+
+    int datasources = 0;
+    int segments = 0;
+    int dataSourceWithColdSegments = 0;
+
+    Collection<ImmutableDruidDataSource> immutableDataSources =
+        
sqlSegmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments();
+
+    for (ImmutableDruidDataSource dataSource : immutableDataSources) {
+      datasources++;
+      Collection<DataSegment> dataSegments = dataSource.getSegments();
+
+      final Map<String, ColumnType> columnTypes = new LinkedHashMap<>();
+
+      for (DataSegment segment : dataSegments) {
+        Integer replicationFactor = getReplicationFactor(segment.getId());
+        if (replicationFactor != null && replicationFactor != 0) {
+          continue;
+        }
+        Optional<SchemaPayloadPlus> optionalSchema = 
segmentSchemaCache.getSchemaForSegment(segment.getId());
+        if (optionalSchema.isPresent()) {
+          RowSignature rowSignature = 
optionalSchema.get().getSchemaPayload().getRowSignature();
+          mergeRowSignature(columnTypes, rowSignature);
+        }
+        segments++;
+      }
+
+      if (columnTypes.isEmpty()) {
+        // this datasource doesn't have any cold segment
+        continue;
+      }
+
+      final RowSignature.Builder builder = RowSignature.builder();
+      columnTypes.forEach(builder::add);
+
+      RowSignature coldSignature = builder.build();
+
+      String dataSourceName = dataSource.getName();
+      dataSourceWithColdSegmentSet.add(dataSourceName);
+      dataSourceWithColdSegments++;
+
+      log.debug("[%s] signature from cold segments is [%s]", dataSourceName, 
coldSignature);
+
+      coldSchemaTable.put(dataSourceName, new 
DataSourceInformation(dataSourceName, coldSignature));
+    }
+
+    // remove any stale datasource from the map
+    coldSchemaTable.keySet().retainAll(dataSourceWithColdSegmentSet);
+
+    String executionStatsLog = StringUtils.format(
+        "Cold schema processing was slow, taking [%d] millis. "

Review Comment:
   ```suggestion
           "Cold schema processing took [%d] millis. "
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to