Repository: sentry Updated Branches: refs/heads/sentry-ha-redesign efb2a059a -> 3105a1bfb
SENTRY-1811: Optimize data structures used in HDFS sync (Misha Dmitriev, reviewed by Alex Kolbasov) Project: http://git-wip-us.apache.org/repos/asf/sentry/repo Commit: http://git-wip-us.apache.org/repos/asf/sentry/commit/3105a1bf Tree: http://git-wip-us.apache.org/repos/asf/sentry/tree/3105a1bf Diff: http://git-wip-us.apache.org/repos/asf/sentry/diff/3105a1bf Branch: refs/heads/sentry-ha-redesign Commit: 3105a1bfbf2134daa6394a09f9b380a2a521a79d Parents: efb2a05 Author: Alexander Kolbasov <[email protected]> Authored: Fri Jun 23 16:07:16 2017 -0700 Committer: Alexander Kolbasov <[email protected]> Committed: Fri Jun 23 16:07:16 2017 -0700 ---------------------------------------------------------------------- .../java/org/apache/sentry/hdfs/HMSPaths.java | 107 +++++++++++++------ .../org/apache/sentry/hdfs/HMSPathsDumper.java | 15 +-- .../org/apache/sentry/hdfs/TestHMSPaths.java | 16 +-- 3 files changed, 91 insertions(+), 47 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sentry/blob/3105a1bf/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java ---------------------------------------------------------------------- diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java index 3fb9b4e..6e71561 100644 --- a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java +++ b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java @@ -133,12 +133,13 @@ public class HMSPaths implements AuthzPaths { private String pathElement; // A set of authorizable objects associated with this entry. Authorizable - // object should be case insensitive. - private Set<String> authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + // object should be case insensitive. The set is allocated lazily to avoid + // wasting memory due to empty sets. + private Set<String> authzObjs; - // Path of child element to the path entry mapping. - // e.g. 'b' -> '/a/b' - private final Map<String, Entry> children = new HashMap<>(); + // Path of child element to the path entry mapping, e.g. 'b' -> '/a/b' + // This is allocated lazily to avoid wasting memory due to empty maps. + private Map<String, Entry> children; /** * Construct an Entry with one authzObj. @@ -151,7 +152,7 @@ public class HMSPaths implements AuthzPaths { Entry(Entry parent, String pathElement, EntryType type, String authzObj) { this.parent = parent; this.type = type; - this.pathElement = pathElement; + this.pathElement = pathElement.intern(); addAuthzObj(authzObj); } @@ -165,33 +166,65 @@ public class HMSPaths implements AuthzPaths { Entry(Entry parent, String pathElement, EntryType type, Set<String> authzObjs) { this.parent = parent; this.type = type; - this.pathElement = pathElement; + this.pathElement = pathElement.intern(); addAuthzObjs(authzObjs); } - // Get all the mapping of the children element to - // the path entries. - Map<String, Entry> getChildren() { - return children; + Entry getChild(String pathElement) { + if (children == null) { + return null; + } + return children.get(pathElement); + } + + void putChild(String pathElement, Entry entry) { + if (children == null) { + // We allocate this map lazily and with small initial capacity to avoid + // memory waste due to empty and underpopulated maps. + children = new HashMap<>(2); + } + children.put(pathElement.intern(), entry); + } + + Entry removeChild(String pathElement) { + return children.remove(pathElement); + } + + boolean hasChildren() { return children != null && !children.isEmpty(); } + + int numChildren() { return children == null ? 0 : children.size(); } + + Collection<Entry> childrenValues() { + return children != null ? children.values() : Collections.<Entry>emptyList(); } void clearAuthzObjs() { - authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + authzObjs = null; } void removeAuthzObj(String authzObj) { - authzObjs.remove(authzObj); + if (authzObjs != null) { + authzObjs.remove(authzObj); + } } void addAuthzObj(String authzObj) { if (authzObj != null) { - authzObjs.add(authzObj); + if (authzObjs == null) { + authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + } + authzObjs.add(authzObj.intern()); } } void addAuthzObjs(Set<String> authzObjs) { if (authzObjs != null) { - this.authzObjs.addAll(authzObjs); + if (this.authzObjs == null) { + this.authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + } + for (String authzObj : authzObjs) { + this.authzObjs.add(authzObj.intern()); + } } } @@ -205,7 +238,7 @@ public class HMSPaths implements AuthzPaths { public String toString() { return String.format("Entry[fullPath: %s, type: %s, authObject: %s]", - getFullPath(), type, Joiner.on(",").join(authzObjs)); + getFullPath(), type, authzObjs != null ? Joiner.on(",").join(authzObjs) : ""); } @Override @@ -283,11 +316,11 @@ public class HMSPaths implements AuthzPaths { // The loop is resilient to 0 or 1 element list. for (int i = 0; i < pathElements.size() - 1; i++) { String elem = pathElements.get(i); - Entry child = parent.getChildren().get(elem); + Entry child = parent.getChild(elem); if (child == null) { child = new Entry(parent, elem, EntryType.DIR, (String) null); - parent.getChildren().put(elem, child); + parent.putChild(elem, child); } parent = child; @@ -311,7 +344,7 @@ public class HMSPaths implements AuthzPaths { Entry entryParent = createParent(pathElements); String lastPathElement = pathElements.get(pathElements.size() - 1); - Entry child = entryParent.getChildren().get(lastPathElement); + Entry child = entryParent.getChild(lastPathElement); // Create the child entry if not found. If found and the entry is // already a prefix or authzObj type, then only add the authzObj. @@ -319,7 +352,7 @@ public class HMSPaths implements AuthzPaths { // and add the authzObj. if (child == null) { child = new Entry(entryParent, lastPathElement, type, authzObj); - entryParent.getChildren().put(lastPathElement, child); + entryParent.putChild(lastPathElement, child); } else if (type == EntryType.AUTHZ_OBJECT && (child.getType() == EntryType.PREFIX || child.getType() == EntryType.AUTHZ_OBJECT)) { child.addAuthzObj(authzObj); @@ -373,7 +406,7 @@ public class HMSPaths implements AuthzPaths { */ private void deleteFromParent() { if (getParent() != null) { - getParent().getChildren().remove(getPathElement()); + getParent().removeChild(getPathElement()); getParent().deleteIfDangling(); parent = null; } @@ -381,13 +414,15 @@ public class HMSPaths implements AuthzPaths { public void deleteAuthzObject(String authzObj) { if (getParent() != null) { - if (getChildren().isEmpty()) { + if (!hasChildren()) { // Remove the authzObj on the path entry. If the path // entry no longer maps to any authzObj, removes the // entry recursively. - authzObjs.remove(authzObj); - if (authzObjs.isEmpty()) { + if (authzObjs != null) { + authzObjs.remove(authzObj); + } + if (authzObjs == null || authzObjs.isEmpty()) { deleteFromParent(); } } else { @@ -397,7 +432,9 @@ public class HMSPaths implements AuthzPaths { // the path entry. if (getType() == EntryType.AUTHZ_OBJECT) { setType(EntryType.DIR); - authzObjs.remove(authzObj); + if (authzObjs != null) { + authzObjs.remove(authzObj); + } } } } @@ -413,7 +450,7 @@ public class HMSPaths implements AuthzPaths { private void moveTo(Entry newParent, String pathElem) { Preconditions.checkNotNull(newParent); Preconditions.checkArgument(!pathElem.isEmpty()); - if (newParent.children.containsKey(pathElem)) { + if (newParent.getChild(pathElem) != null) { LOG.warn(String.format( "Attempt to move %s to %s: entry with the same name %s already exists", this.getFullPath(), newParent.getFullPath(), pathElem)); @@ -421,13 +458,13 @@ public class HMSPaths implements AuthzPaths { } deleteFromParent(); parent = newParent; - parent.children.put(pathElem, this); - pathElement = pathElem; + parent.putChild(pathElem, this); + pathElement = pathElem.intern(); } public void delete() { if (getParent() != null) { - if (getChildren().isEmpty()) { + if (!hasChildren()) { deleteFromParent(); } else { // if the entry was for an authz object and has children, we @@ -441,7 +478,7 @@ public class HMSPaths implements AuthzPaths { } private void deleteIfDangling() { - if (getChildren().isEmpty() && getType().isRemoveIfDangling()) { + if (!hasChildren() && getType().isRemoveIfDangling()) { delete(); } } @@ -458,8 +495,12 @@ public class HMSPaths implements AuthzPaths { return pathElement; } + /** + * @return the set of auth objects. The returned set should be used only + * for querying, not for any modifications. + */ public Set<String> getAuthzObjs() { - return authzObjs; + return authzObjs != null ? authzObjs : Collections.<String>emptySet(); } public Entry findPrefixEntry(List<String> pathElements) { @@ -474,7 +515,7 @@ public class HMSPaths implements AuthzPaths { if (index == pathElements.size()) { prefixEntry = null; } else { - Entry child = getChildren().get(pathElements.get(index)); + Entry child = getChild(pathElements.get(index)); if (child != null) { if (child.getType() == EntryType.PREFIX) { prefixEntry = child; @@ -501,7 +542,7 @@ public class HMSPaths implements AuthzPaths { found = this; } } else { - Entry child = getChildren().get(pathElements[index]); + Entry child = getChild(pathElements[index]); if (child != null) { if (index == pathElements.length - 1) { found = (child.getAuthzObjs().size() != 0) ? child : lastAuthObj; http://git-wip-us.apache.org/repos/asf/sentry/blob/3105a1bf/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java ---------------------------------------------------------------------- diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java index 4e736ea..479188e 100644 --- a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java +++ b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java @@ -17,6 +17,7 @@ */ package org.apache.sentry.hdfs; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -58,9 +59,9 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> { private void cloneToTPathEntry(Entry parent, TPathEntry tParent, AtomicInteger counter, Map<Integer, TPathEntry> idMap) { - for (Entry child : parent.getChildren().values()) { + for (Entry child : parent.childrenValues()) { Tuple childTuple = createTPathEntry(child, counter, idMap); - tParent.getChildren().add(childTuple.id); + tParent.addToChildren(childTuple.id); cloneToTPathEntry(child, childTuple.entry, counter, idMap); } } @@ -68,9 +69,11 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> { private Tuple createTPathEntry(Entry entry, AtomicInteger idCounter, Map<Integer, TPathEntry> idMap) { int myId = idCounter.incrementAndGet(); + Set<Integer> children = entry.hasChildren() ? + new HashSet<Integer>(entry.numChildren()) : Collections.<Integer>emptySet(); TPathEntry tEntry = new TPathEntry(entry.getType().getByte(), - entry.getPathElement(), new HashSet<Integer>()); - if (entry.getAuthzObjs().size() != 0) { + entry.getPathElement(), children); + if (!entry.getAuthzObjs().isEmpty()) { tEntry.setAuthzObjs(entry.getAuthzObjs()); } idMap.put(myId, tEntry); @@ -99,7 +102,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> { Entry child = null; boolean isChildPrefix = hasCrossedPrefix; if (!hasCrossedPrefix) { - child = parent.getChildren().get(tChild.getPathElement()); + child = parent.getChild(tChild.getPathElement()); // If we havn't reached a prefix entry yet, then child should // already exists.. else it is not part of the prefix if (child == null) { @@ -126,7 +129,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> { paths.add(child); } } - parent.getChildren().put(child.getPathElement(), child); + parent.putChild(child.getPathElement(), child); cloneToEntry(tChild, child, idMap, authzObjToPath, isChildPrefix); } } http://git-wip-us.apache.org/repos/asf/sentry/blob/3105a1bf/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java ---------------------------------------------------------------------- diff --git a/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java b/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java index f31ec21..a0d7bdc 100644 --- a/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java +++ b/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java @@ -65,7 +65,7 @@ public class TestHMSPaths { Assert.assertEquals(HMSPaths.EntryType.DIR, root.getType()); Assert.assertTrue(root.getAuthzObjs().size() == 0); Assert.assertEquals(Path.SEPARATOR, root.getFullPath()); - Assert.assertTrue(root.getChildren().isEmpty()); + Assert.assertFalse(root.hasChildren()); root.delete(); try { root.find(null, true); @@ -122,14 +122,14 @@ public class TestHMSPaths { HMSPaths.Entry entry = root.createPrefix(Lists.newArrayList("a")); entry.toString(); - Assert.assertEquals(1, root.getChildren().size()); + Assert.assertEquals(1, root.childrenValues().size()); Assert.assertEquals(root, entry.getParent()); Assert.assertEquals(HMSPaths.EntryType.PREFIX, entry.getType()); Assert.assertEquals("a", entry.getPathElement()); Assert.assertEquals(0, entry.getAuthzObjs().size()); Assert.assertEquals(Path.SEPARATOR + "a", entry.getFullPath()); - Assert.assertTrue(entry.getChildren().isEmpty()); + Assert.assertFalse(entry.hasChildren()); Assert.assertEquals(entry, root.findPrefixEntry(Lists.newArrayList("a"))); Assert.assertEquals(entry, root.findPrefixEntry(Lists.newArrayList("a", "b"))); @@ -154,7 +154,7 @@ public class TestHMSPaths { } entry.delete(); - Assert.assertTrue(root.getChildren().isEmpty()); + Assert.assertFalse(root.hasChildren()); } @Test @@ -163,7 +163,7 @@ public class TestHMSPaths { HMSPaths.Entry entry = root.createPrefix(Lists.newArrayList("a", "b")); entry.toString(); - Assert.assertEquals(1, root.getChildren().size()); + Assert.assertEquals(1, root.childrenValues().size()); Assert.assertEquals(root, entry.getParent().getParent()); Assert.assertEquals(HMSPaths.EntryType.PREFIX, entry.getType()); @@ -176,8 +176,8 @@ public class TestHMSPaths { Assert.assertEquals(Path.SEPARATOR + "a" + Path.SEPARATOR + "b", entry.getFullPath()); Assert.assertEquals(Path.SEPARATOR + "a", entry.getParent().getFullPath()); - Assert.assertTrue(entry.getChildren().isEmpty()); - Assert.assertEquals(1, entry.getParent().getChildren().size()); + Assert.assertFalse(entry.hasChildren()); + Assert.assertEquals(1, entry.getParent().childrenValues().size()); Assert.assertEquals(entry, root.findPrefixEntry(Lists.newArrayList("a", "b"))); Assert.assertNull(root.findPrefixEntry(Lists.newArrayList("a"))); @@ -199,7 +199,7 @@ public class TestHMSPaths { } entry.delete(); - Assert.assertTrue(root.getChildren().isEmpty()); + Assert.assertFalse(root.hasChildren()); } @Test
