wchevreuil commented on code in PR #6250:
URL: https://github.com/apache/hbase/pull/6250#discussion_r1768478288
##########
hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java:
##########
@@ -1626,52 +1628,33 @@ private void parsePB(BucketCacheProtos.BucketCacheEntry
proto) throws IOExceptio
}
private void persistChunkedBackingMap(FileOutputStream fos) throws
IOException {
- long numChunks = backingMap.size() / persistenceChunkSize;
- if (backingMap.size() % persistenceChunkSize != 0) {
- numChunks += 1;
- }
-
LOG.debug(
"persistToFile: before persisting backing map size: {}, "
- + "fullycachedFiles size: {}, chunkSize: {}, numberofChunks: {}",
- backingMap.size(), fullyCachedFiles.size(), persistenceChunkSize,
numChunks);
+ + "fullycachedFiles size: {}, chunkSize: {}", backingMap.size(),
fullyCachedFiles.size(),
+ persistenceChunkSize);
- BucketProtoUtils.serializeAsPB(this, fos, persistenceChunkSize, numChunks);
+ BucketProtoUtils.serializeAsPB(this, fos, persistenceChunkSize);
LOG.debug(
- "persistToFile: after persisting backing map size: {}, "
- + "fullycachedFiles size: {}, numChunksPersisteed: {}",
- backingMap.size(), fullyCachedFiles.size(), numChunks);
+ "persistToFile: after persisting backing map size: {}, " +
"fullycachedFiles size: {}",
+ backingMap.size(), fullyCachedFiles.size());
}
- private void retrieveChunkedBackingMap(FileInputStream in, int[]
bucketSizes) throws IOException {
- byte[] bytes = new byte[Long.BYTES];
- int readSize = in.read(bytes);
- if (readSize != Long.BYTES) {
- throw new IOException("Invalid size of chunk-size read from persistence:
" + readSize);
- }
- long batchSize = Bytes.toLong(bytes, 0);
-
- readSize = in.read(bytes);
- if (readSize != Long.BYTES) {
- throw new IOException("Invalid size for number of chunks read from
persistence: " + readSize);
- }
- long numChunks = Bytes.toLong(bytes, 0);
-
- LOG.info("Number of chunks: {}, chunk size: {}", numChunks, batchSize);
+ private void retrieveChunkedBackingMap(FileInputStream in) throws
IOException {
// Read the first chunk that has all the details.
BucketCacheProtos.BucketCacheEntry firstChunk =
BucketCacheProtos.BucketCacheEntry.parseDelimitedFrom(in);
parseFirstChunk(firstChunk);
// Subsequent chunks have the backingMap entries.
- for (int i = 1; i < numChunks; i++) {
- LOG.info("Reading chunk no: {}", i + 1);
+ int numChunks = 0;
+ while (in.available() > 0) {
parseChunkPB(BucketCacheProtos.BackingMap.parseDelimitedFrom(in),
firstChunk.getDeserializersMap());
- LOG.info("Retrieved chunk: {}", i + 1);
+ numChunks++;
}
Review Comment:
If we change the way we write to the file as I suggested, so that we have
only the `BucketCacheProtos.BucketCacheEntry` at the beginning followed by all
`BucketCacheProtos.BackingMap` chunks , then we should change the naming here,
`firstChunk` should now be `entry`. And these `parseFirstChunk`,
`parseChunkPB` are not really parsing anything (parsing is delegated to the
proto utils), but rather updating the cache index structures, so we should
rename it to something like `updateCacheIndex`.
Also looking at `parseFirstChunck` and `parseChunk`, we should replace
duplicate code inside `parseFirstChunck` by call to `parseChunk`. Or since
`parseFirstChunk` is only used here, we could just get rid of it and simply do:
`fullyCachedFiles.clear();
fullyCachedFiles.putAll(BucketProtoUtils.fromPB(entry.getCachedFilesMap()));
`
Whilst the `backingMap` and `blocksByHfile` would get updated all in the
loop.
##########
hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java:
##########
@@ -62,42 +62,55 @@ static BucketCacheProtos.BucketCacheEntry toPB(BucketCache
cache,
.build();
}
- public static void serializeAsPB(BucketCache cache, FileOutputStream fos,
long chunkSize,
- long numChunks) throws IOException {
+ public static void serializeAsPB(BucketCache cache, FileOutputStream fos,
long chunkSize)
+ throws IOException {
+ // Write the new version of magic number.
+ fos.write(PB_MAGIC_V2);
+
int blockCount = 0;
- int chunkCount = 0;
int backingMapSize = cache.backingMap.size();
BucketCacheProtos.BackingMap.Builder builder =
BucketCacheProtos.BackingMap.newBuilder();
-
- fos.write(PB_MAGIC_V2);
- fos.write(Bytes.toBytes(chunkSize));
- fos.write(Bytes.toBytes(numChunks));
-
BucketCacheProtos.BackingMapEntry.Builder entryBuilder =
BucketCacheProtos.BackingMapEntry.newBuilder();
- for (Map.Entry<BlockCacheKey, BucketEntry> entry :
cache.backingMap.entrySet()) {
- blockCount++;
- entryBuilder.clear();
- entryBuilder.setKey(BucketProtoUtils.toPB(entry.getKey()));
- entryBuilder.setValue(BucketProtoUtils.toPB(entry.getValue()));
- builder.addEntry(entryBuilder.build());
+ Iterator<Map.Entry<BlockCacheKey, BucketEntry>> entrySetIter =
+ cache.backingMap.entrySet().iterator();
+ // Create the first chunk and persist all details along with it.
+ while (entrySetIter.hasNext()) {
+ blockCount++;
+ Map.Entry<BlockCacheKey, BucketEntry> entry = entrySetIter.next();
+ addToBuilder(entry, entryBuilder, builder);
if (blockCount % chunkSize == 0 || (blockCount == backingMapSize)) {
- chunkCount++;
- if (chunkCount == 1) {
- // Persist all details along with the first chunk into
BucketCacheEntry
BucketProtoUtils.toPB(cache, builder.build()).writeDelimitedTo(fos);
- } else {
- // Directly persist subsequent backing-map chunks.
+ break;
+ }
+ }
+ builder.clear();
Review Comment:
Why do we need two while loops? just do the `BucketProtoUtils.toPB(cache,
builder.build()).writeDelimitedTo(fos);` before the loop. That would write all
the `BucketCacheEntry` meta info at the beginning of the file with an empty
backingMap at this point, but that's fine as we'll write all backingMap chunks
subsequently and this code would be cleaner, I suppose.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]