Author: mduerig
Date: Thu Feb 12 14:00:43 2015
New Revision: 1659259

URL: http://svn.apache.org/r1659259
Log:
OAK-2504: oak-run debug should list a breakdown of space usage per record type
- Use long to record memory usages
- More space efficient way of remembering seen record ids

Added:
    
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/ShortSetTest.java
Modified:
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordUsageAnalyser.java
    
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/RecordUsageAnalyserTest.java

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordUsageAnalyser.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordUsageAnalyser.java?rev=1659259&r1=1659258&r2=1659259&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordUsageAnalyser.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordUsageAnalyser.java
 Thu Feb 12 14:00:43 2015
@@ -20,7 +20,9 @@
 package org.apache.jackrabbit.oak.plugins.segment;
 
 import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.collect.Sets.newHashSet;
+import static com.google.common.collect.Maps.newHashMap;
+import static java.lang.System.arraycopy;
+import static java.util.Arrays.binarySearch;
 import static org.apache.commons.io.FileUtils.byteCountToDisplaySize;
 import static org.apache.jackrabbit.oak.api.Type.BINARY;
 import static org.apache.jackrabbit.oak.plugins.segment.ListRecord.LEVEL_SIZE;
@@ -32,7 +34,7 @@ import static org.apache.jackrabbit.oak.
 import static 
org.apache.jackrabbit.oak.plugins.segment.Template.ZERO_CHILD_NODES;
 
 import java.util.Formatter;
-import java.util.Set;
+import java.util.Map;
 
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
@@ -47,36 +49,34 @@ import org.apache.jackrabbit.oak.spi.sta
  * space from aligning records is not accounted for.
  */
 public class RecordUsageAnalyser {
-    private final Set<RecordId> seenIds = newHashSet();
+    private long mapSize;       // leaf and branch
+    private long listSize;      // list and bucket
+    private long valueSize;     // inlined values
+    private long templateSize;  // template
+    private long nodeSize;      // node
 
-    private int mapSize;       // leaf and branch
-    private int listSize;      // list and bucket
-    private int valueSize;     // inlined values
-    private int templateSize;  // template
-    private int nodeSize;      // node
-
-    public int getMapSize() {
+    public long getMapSize() {
         return mapSize;
     }
 
-    public int getListSize() {
+    public long getListSize() {
         return listSize;
     }
 
-    public int getValueSize() {
+    public long getValueSize() {
         return valueSize;
     }
 
-    public int getTemplateSize() {
+    public long getTemplateSize() {
         return templateSize;
     }
 
-    public int getNodeSize() {
+    public long getNodeSize() {
         return nodeSize;
     }
 
     public void analyseNode(RecordId nodeId) {
-        if (seenIds.add(nodeId)) {
+        if (notSeen(nodeId)) {
             Segment segment = nodeId.getSegment();
             int offset = nodeId.getOffset();
             RecordId templateId = segment.readRecordId(offset);
@@ -136,7 +136,7 @@ public class RecordUsageAnalyser {
     }
 
     private void analyseTemplate(RecordId templateId) {
-        if (seenIds.add(templateId)) {
+        if (notSeen(templateId)) {
             Segment segment = templateId.getSegment();
             int size = 0;
             int offset = templateId.getOffset();
@@ -180,7 +180,7 @@ public class RecordUsageAnalyser {
     }
 
     private void analyseMap(RecordId mapId, MapRecord map) {
-        if (seenIds.add(mapId)) {
+        if (notSeen(mapId)) {
             if (map.isDiff()) {
                 analyseDiff(mapId, map);
             } else if (map.isLeaf()) {
@@ -225,13 +225,13 @@ public class RecordUsageAnalyser {
     }
 
     private void analyseProperty(RecordId propertyId, PropertyTemplate 
template) {
-        if (!seenIds.contains(propertyId)) {
+        if (!contains(propertyId)) {
             Segment segment = propertyId.getSegment();
             int offset = propertyId.getOffset();
             Type<?> type = template.getType();
 
             if (type.isArray()) {
-                seenIds.add(propertyId);
+                notSeen(propertyId);
                 int size = segment.readInt(offset);
                 valueSize += 4;
 
@@ -259,7 +259,7 @@ public class RecordUsageAnalyser {
     }
 
     private void analyseBlob(RecordId blobId) {
-        if (seenIds.add(blobId)) {
+        if (notSeen(blobId)) {
             Segment segment = blobId.getSegment();
             int offset = blobId.getOffset();
             byte head = segment.readByte(offset);
@@ -289,7 +289,7 @@ public class RecordUsageAnalyser {
     }
 
     private void analyseString(RecordId stringId) {
-        if (seenIds.add(stringId)) {
+        if (notSeen(stringId)) {
             Segment segment = stringId.getSegment();
             int offset = stringId.getOffset();
 
@@ -310,7 +310,7 @@ public class RecordUsageAnalyser {
     }
 
     private void analyseList(RecordId listId, int size) {
-        if (seenIds.add(listId)) {
+        if (notSeen(listId)) {
             listSize += noOfListSlots(size) * RECORD_ID_BYTES;
         }
     }
@@ -328,4 +328,58 @@ public class RecordUsageAnalyser {
         }
     }
 
+    private final Map<String, ShortSet> seenIds = newHashMap();
+
+    private boolean notSeen(RecordId id) {
+        String segmentId = id.getSegmentId().toString();
+        ShortSet offsets = seenIds.get(segmentId);
+        if (offsets == null) {
+            offsets = new ShortSet();
+            seenIds.put(segmentId, offsets);
+        }
+        return offsets.add(crop(id.getOffset()));
+    }
+
+    private boolean contains(RecordId id) {
+        String segmentId = id.getSegmentId().toString();
+        ShortSet offsets = seenIds.get(segmentId);
+        return offsets != null && offsets.contains(crop(id.getOffset()));
+    }
+
+    private static short crop(int value) {
+        return (short) (value >> Segment.RECORD_ALIGN_BITS);
+    }
+
+    static class ShortSet {
+        short[] elements;
+
+        boolean add(short n) {
+            if (elements == null) {
+                elements = new short[1];
+                elements[0] = n;
+                return true;
+            } else {
+                int k = binarySearch(elements, n);
+                if (k < 0) {
+                    int l = -k - 1;
+                    short[] e = new short[elements.length + 1];
+                    arraycopy(elements, 0, e, 0, l);
+                    e[l] = n;
+                    int c = elements.length - l;
+                    if (c > 0) {
+                        arraycopy(elements, l, e, l + 1, c);
+                    }
+                    elements = e;
+                    return true;
+                } else {
+                    return false;
+                }
+            }
+        }
+
+        boolean contains(short n) {
+            return elements != null && binarySearch(elements, n) >= 0;
+        }
+    }
+
 }

Modified: 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/RecordUsageAnalyserTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/RecordUsageAnalyserTest.java?rev=1659259&r1=1659258&r2=1659259&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/RecordUsageAnalyserTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/RecordUsageAnalyserTest.java
 Thu Feb 12 14:00:43 2015
@@ -262,7 +262,7 @@ public class RecordUsageAnalyserTest {
     }
 
     private static void assertSizes(RecordUsageAnalyser analyser,
-            int maps, int lists, int values, int templates, int nodes) {
+            long maps, long lists, long values, long templates, long nodes) {
         assertEquals("maps sizes mismatch", maps, analyser.getMapSize());
         assertEquals("lists sizes mismatch", lists, analyser.getListSize());
         assertEquals("value sizes mismatch", values, analyser.getValueSize());

Added: 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/ShortSetTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/ShortSetTest.java?rev=1659259&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/ShortSetTest.java
 (added)
+++ 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/ShortSetTest.java
 Thu Feb 12 14:00:43 2015
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.segment;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Random;
+
+import org.apache.jackrabbit.oak.plugins.segment.RecordUsageAnalyser.ShortSet;
+import org.junit.Test;
+
+public class ShortSetTest {
+    private final ShortSet set = new ShortSet();
+
+    @Test
+    public void empty() {
+        for (short k = Short.MIN_VALUE; k < Short.MAX_VALUE; k++) {
+            assertFalse(set.contains(k));
+        }
+    }
+
+    @Test
+    public void addOne() {
+        set.add(s(42));
+        assertTrue(set.contains(s(42)));
+    }
+
+    @Test
+    public void addTwo() {
+        set.add(s(21));
+        set.add(s(42));
+        assertTrue(set.contains(s(21)));
+        assertTrue(set.contains(s(42)));
+    }
+
+    @Test
+    public void addTwoReverse() {
+        set.add(s(42));
+        set.add(s(21));
+        assertTrue(set.contains(s(21)));
+        assertTrue(set.contains(s(42)));
+    }
+
+    @Test
+    public void addFirst() {
+        short[] elements = new short[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 0};
+        addAndCheck(elements);
+    }
+
+    @Test
+    public void addLast() {
+        short[] elements = new short[]{8, 7, 6, 5, 4, 3, 2, 1, 0, 9};
+        addAndCheck(elements);
+    }
+
+    @Test
+    public void addMedian() {
+        short[] elements = new short[]{0, 1, 2, 3, 4, 6, 7, 8, 9, 5};
+        addAndCheck(elements);
+    }
+
+    @Test
+    public void addRandom() {
+        short[] elements = new short[8192];
+        Random rnd = new Random();
+        for (int k = 0; k < elements.length; k++) {
+            elements[k] = s(rnd.nextInt(1 + Short.MAX_VALUE - Short.MIN_VALUE) 
+ Short.MIN_VALUE);
+        }
+
+        addAndCheck(elements);
+    }
+
+    private void addAndCheck(short[] elements) {
+        for (short k : elements) {
+            set.add(k);
+        }
+        for (short k : elements) {
+            assertTrue(set.contains(k));
+        }
+    }
+
+    private static short s(int n) {
+        return (short) n;
+    }
+}


Reply via email to