gianm commented on code in PR #19496: URL: https://github.com/apache/druid/pull/19496#discussion_r3291591849
########## server/src/main/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrap.java: ########## @@ -0,0 +1,393 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.file.SegmentFileBuilder; +import org.apache.druid.segment.file.SegmentFileContainerMetadata; +import org.apache.druid.segment.file.SegmentFileMetadata; +import org.apache.druid.segment.projections.Projections; +import org.apache.druid.timeline.SegmentId; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +/** + * Bootstraps partial-segment cache entries from existing on-disk state. Called by the cache manager on historical + * startup for each segment directory that contains the partial-download layout (`{targetFilename}.header` plus one + * or more `{targetFilename}.container.NNNNN` files). + * <p> + * The bootstrap is read-only with respect to deep storage; it never issues a range read. The on-disk header file is + * parsed in-place by {@link PartialSegmentFileMapperV10#create} (which detects header corruption and, for that one + * case, may delete the local copy; bootstrap callers should treat that as "no restorable state" and fall back to a + * cold start). Bundle entries are registered as weak entries on the storage location, mounted (which sparse-allocates + * any container files that weren't already present and re-establishes parent holds), and returned to the caller. + * <p> + * Parent-set inference is delegated to {@link PartialSegmentMetadataCacheEntry#inferParentBundles}. A bundle whose + * inferred parent isn't itself present on disk is treated as <i>orphaned</i>: its on-disk container files are deleted + * (via {@link PartialSegmentFileMapperV10#evictContainer}, which also clears the relevant bitmap bits) and the bundle + * is not restored. The next access through the cache manager acquire path then triggers a clean cold re-fetch, the + * same fall-back as when the cache manager finds a segment listed in the info directory but missing on disk. + */ +public final class PartialSegmentCacheBootstrap +{ + private static final EmittingLogger LOG = new EmittingLogger(PartialSegmentCacheBootstrap.class); + + /** + * Restore a single partial segment's cache entries from its local on-disk layout. + * + * @param segmentId the segment whose entries are being restored + * @param localCacheDir the per-segment directory containing the header + container files + * @param targetFilename the V10 entry-point filename + * @param externalFilenames any external segment file names that were registered as children of the entry-point + * file + * @param jsonMapper used to parse the header + * @param location the storage location these entries belong to; the metadata entry is registered as + * static and bundle entries are registered as weak + * @throws IllegalStateException if the expected header file is missing or unreadable Review Comment: seems to actually throw `DruidException` ########## processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java: ########## @@ -384,6 +485,92 @@ private void ensureFileDownloaded(String name, SegmentInternalFileMetadata fileM } } + /** + * Public entry point for cache-layer code that wants to ensure a container is materialized before any data is + * downloaded into it (e.g. when a per-bundle cache entry is mounted, the entry pre-allocates its container files + * so that subsequent {@link #mapFile} calls have somewhere to write into and the cache layer can charge the + * reservation up front). + */ + public void initializeContainer(int containerIndex) throws IOException + { + checkClosed(); + ensureContainerInitialized(containerIndex); + } + + /** + * Reverse of {@link #initializeContainer(int)}: unmap the in-memory view of the container, delete the local + * container file, and clear the bitmap bits + {@link #downloadedFiles} entries for every internal file that lived + * in this container. + * <p> + * Used by per-bundle cache entries on unmount/eviction to release the disk and memory footprint of one bundle + * without affecting other bundles sharing the same {@link PartialSegmentFileMapperV10}. After eviction, subsequent + * {@link #mapFile} calls for files in this container will re-trigger downloads via {@link #initializeContainer} + * and the bitmap will be repopulated incrementally. + * <p> + * No-op if the container has not been initialized. + */ + public void evictContainer(int containerIndex) + { + checkClosed(); + containerLocks[containerIndex].lock(); + try { + final MappedByteBuffer existing = containers[containerIndex]; + if (existing != null) { + ByteBufferUtils.unmap(existing); + containers[containerIndex] = null; + } + // Try the cached containerFiles[i] first. If it's null, the container was never initialized in this mapper + // instance (typical right after create() with an empty bitmap), but the on-disk file may still exist from a + // previous run. Fall back to the deterministic path so eviction is always effective. + File containerFile = containerFiles[containerIndex]; + if (containerFile == null) { + containerFile = new File( + localCacheDir, + StringUtils.format("%s.container.%05d", targetFilename, containerIndex) + ); + } + if (containerFile.exists()) { + containerFile.delete(); Review Comment: `delete()` returns `false` when it fails. Is ignoring it (as is done here) good, or should we throw an exception or do something else? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
