Repository: sentry Updated Branches: refs/heads/master 6cf5efdb9 -> 1e29fa981
SENTRY-1907 Potential memory optimization when handling big full snapshots. (Alex Kolbasov, reviewed by Misha Dmitriev, Sergio Pena, Na Li) Project: http://git-wip-us.apache.org/repos/asf/sentry/repo Commit: http://git-wip-us.apache.org/repos/asf/sentry/commit/1e29fa98 Tree: http://git-wip-us.apache.org/repos/asf/sentry/tree/1e29fa98 Diff: http://git-wip-us.apache.org/repos/asf/sentry/diff/1e29fa98 Branch: refs/heads/master Commit: 1e29fa981c1e1a6477f0f2400b6d9b77283d8731 Parents: 6cf5efd Author: Alexander Kolbasov <[email protected]> Authored: Wed Aug 30 10:19:21 2017 -0700 Committer: Alexander Kolbasov <[email protected]> Committed: Wed Aug 30 10:19:21 2017 -0700 ---------------------------------------------------------------------- .../org/apache/sentry/hdfs/DBUpdateForwarder.java | 6 +++--- .../org/apache/sentry/hdfs/PathImageRetriever.java | 14 +++++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sentry/blob/1e29fa98/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/DBUpdateForwarder.java ---------------------------------------------------------------------- diff --git a/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/DBUpdateForwarder.java b/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/DBUpdateForwarder.java index 1ab4d6f..5d74421 100644 --- a/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/DBUpdateForwarder.java +++ b/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/DBUpdateForwarder.java @@ -83,7 +83,7 @@ class DBUpdateForwarder<K extends Updateable.Update> { return Collections.emptyList(); } else if (curImgNum > imgNum) { // In case a new HMS snapshot has been processed, then return a full paths image. - LOGGER.info("A newer full update is found with image number: ", curImgNum); + LOGGER.info("A newer full update is found with image number: {}", curImgNum); return Collections.singletonList(imageRetriever.retrieveFullImage()); } } @@ -105,14 +105,14 @@ class DBUpdateForwarder<K extends Updateable.Update> { if (seqNum > SEQUENCE_NUMBER_UPDATE_UNINITIALIZED && deltaRetriever.isDeltaAvailable(seqNum)) { List<K> deltas = deltaRetriever.retrieveDelta(seqNum); if (!deltas.isEmpty()) { - LOGGER.info("Newer delta updates are found up to sequence number: ", curSeqNum); + LOGGER.info("Newer delta updates are found up to sequence number: {}", curSeqNum); return deltas; } } // If the sequence number is < 0 or the requested delta is not available, then we // return a full update. - LOGGER.info("A full update is returned due to an unavailable sequence number: ", seqNum); + LOGGER.info("A full update is returned due to an unavailable sequence number: {}", seqNum); return Collections.singletonList(imageRetriever.retrieveFullImage()); } } http://git-wip-us.apache.org/repos/asf/sentry/blob/1e29fa98/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/PathImageRetriever.java ---------------------------------------------------------------------- diff --git a/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/PathImageRetriever.java b/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/PathImageRetriever.java index f1e67ca..898c7be 100644 --- a/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/PathImageRetriever.java +++ b/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/PathImageRetriever.java @@ -18,14 +18,14 @@ package org.apache.sentry.hdfs; import com.codahale.metrics.Timer; -import com.google.common.base.Splitter; import com.google.common.collect.Lists; import org.apache.sentry.hdfs.service.thrift.TPathChanges; import org.apache.sentry.provider.db.service.persistent.PathsImage; import org.apache.sentry.provider.db.service.persistent.SentryStore; import javax.annotation.concurrent.ThreadSafe; - +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -69,7 +69,15 @@ class PathImageRetriever implements ImageRetriever<PathsUpdate> { TPathChanges pathChange = pathsUpdate.newPathChange(pathEnt.getKey()); for (String path : pathEnt.getValue()) { - pathChange.addToAddPaths(Lists.newArrayList(Splitter.on("/").split(path))); + // Convert each path to a list, so a/b/c becomes {a, b, c} + // Since these are partition names they may have a lot of duplicate strings. + // To save space for big snapshots we intern each path component. + String[] pathComponents = path.split("/"); + List<String> paths = new ArrayList<>(pathComponents.length); + for (String pathElement: pathComponents) { + paths.add(pathElement.intern()); + } + pathChange.addToAddPaths(paths); } }
