Author: mduerig
Date: Mon May 18 08:32:37 2015
New Revision: 1679959

URL: http://svn.apache.org/r1679959
Log:
OAK-2862: CompactionMap#compress() inefficient for large compaction maps
CompactionMap test and benchmark

Added:
    
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/benchmark/
    
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/benchmark/MicroBenchmark.java
Modified:
    jackrabbit/oak/trunk/oak-commons/pom.xml
    
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionMapTest.java
    jackrabbit/oak/trunk/oak-jcr/pom.xml
    jackrabbit/oak/trunk/oak-parent/pom.xml

Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1679959&r1=1679958&r2=1679959&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-commons/pom.xml Mon May 18 08:32:37 2015
@@ -103,5 +103,9 @@
       <artifactId>logback-classic</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-math3</artifactId>
+    </dependency>
   </dependencies>
 </project>

Added: 
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/benchmark/MicroBenchmark.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/benchmark/MicroBenchmark.java?rev=1679959&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/benchmark/MicroBenchmark.java
 (added)
+++ 
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/benchmark/MicroBenchmark.java
 Mon May 18 08:32:37 2015
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.commons.benchmark;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+
+
+/**
+ * Poor man's micro benchmark suite.
+ * <p>
+ * Implementations of {@link Benchmark} are executed by the {@link 
#run(Benchmark)} method.
+ * Execution consists of a warm up phase followed by the actual benchmark run.
+ */
+public final class MicroBenchmark {
+    private MicroBenchmark() { }
+
+    /**
+     * Benchmark base class.
+     */
+    public abstract static class Benchmark {
+
+        /**
+         * The benchmark runner calls this method first and exactly once by 
for setting up
+         * the test fixture.
+         */
+        public void setup() throws Exception { }
+
+        /**
+         * The benchmark runner calls the method  before every call to the 
{@code run}
+         * method for setting the scope of the subsequent call to {@code run}.
+         */
+        public void beforeRun() throws Exception { }
+
+        /**
+         * The benchmark runner calls this method a number of times to measure 
its
+         * runtime performance.
+         */
+        public abstract void run() throws Exception;
+
+        /**
+         * The benchmark runner calls the method  after every call to the 
{@code run}
+         * method for tearing down the scope of the previous call to {@code 
run}.
+         */
+        public void afterRun() throws Exception { }
+
+        /**
+         * The benchmark runner calls this method exactly once and only if the 
benchmark
+         * did not result in an error. This default implementation tabulates
+         * the percentiles of the gathered test statistics.
+         */
+        public void result(DescriptiveStatistics statistics) {
+            System.out.println(this);
+            if (statistics.getN() > 0) {
+                System.out.format(
+                        "%6s  %6s  %6s  %6s  %6s  %6s%n",
+                        "min", "10%", "50%", "90%", "max", "N");
+                System.out.format(
+                        "%6.0f  %6.0f  %6.0f  %6.0f  %6.0f  %6d%n",
+                        statistics.getMin() / 1000000,
+                        statistics.getPercentile(10.0) / 1000000,
+                        statistics.getPercentile(50.0) / 1000000,
+                        statistics.getPercentile(90.0) / 1000000,
+                        statistics.getMax() / 1000000,
+                        statistics.getN());
+            } else {
+                System.out.println("No results");
+            }
+        }
+
+        /**
+         * The benchmark runner calls this method last and exactly once for 
tearing down
+         * the test fixture.
+         */
+        public void tearDown() throws Exception { }
+    }
+
+    /**
+     * Run a {@code benchmark}
+     * @param benchmark
+     * @throws Exception
+     */
+    public static void run(Benchmark benchmark) throws Exception {
+        benchmark.setup();
+        try {
+            // Warm up
+            runTest(benchmark);
+
+            // Run the test
+            benchmark.result(runTest(benchmark));
+        } finally {
+            benchmark.tearDown();
+        }
+    }
+
+    private static DescriptiveStatistics runTest(Benchmark benchmark) throws 
Exception {
+        final DescriptiveStatistics statistics = new DescriptiveStatistics();
+        long runtimeEnd = System.currentTimeMillis() + 
TimeUnit.SECONDS.toMillis(60);
+        while (System.currentTimeMillis() < runtimeEnd) {
+            statistics.addValue(execute(benchmark));
+        }
+        return statistics;
+    }
+
+    private static double execute(Benchmark benchmark) throws Exception {
+        benchmark.beforeRun();
+        try {
+            long start = System.nanoTime();
+            benchmark.run();
+            return System.nanoTime() - start;
+        } finally {
+            benchmark.afterRun();
+        }
+    }
+
+}

Modified: 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionMapTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionMapTest.java?rev=1679959&r1=1679958&r2=1679959&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionMapTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionMapTest.java
 Mon May 18 08:32:37 2015
@@ -1,111 +1,308 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 package org.apache.jackrabbit.oak.plugins.segment;
 
+import static com.google.common.collect.Iterables.get;
+import static com.google.common.collect.Lists.newArrayList;
 import static com.google.common.collect.Maps.newHashMap;
+import static com.google.common.collect.Maps.newLinkedHashMap;
+import static com.google.inject.internal.util.$Sets.newHashSet;
+import static java.io.File.createTempFile;
 import static junit.framework.Assert.assertTrue;
+import static org.apache.jackrabbit.oak.commons.benchmark.MicroBenchmark.run;
 import static 
org.apache.jackrabbit.oak.plugins.segment.Segment.MAX_SEGMENT_SIZE;
 import static 
org.apache.jackrabbit.oak.plugins.segment.Segment.RECORD_ALIGN_BITS;
+import static org.apache.jackrabbit.oak.plugins.segment.SegmentVersion.V_11;
+import static 
org.apache.jackrabbit.oak.plugins.segment.file.FileStore.newFileStore;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assume.assumeTrue;
 
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Random;
+import java.util.Set;
+import java.util.UUID;
 
-import org.apache.jackrabbit.oak.plugins.segment.memory.MemoryStore;
+import com.google.common.base.Stopwatch;
+import org.apache.jackrabbit.oak.commons.benchmark.MicroBenchmark.Benchmark;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 
 public class CompactionMapTest {
+    private static final boolean BENCH = Boolean.getBoolean("benchmark");
+    private static final int SEED = Integer.getInteger("SEED", new 
Random().nextInt());
 
-    public static void main(String[] args) {
-        // check the memory use of really large mappings, 1M compacted
-        // segments with 10 records each.
-        Runtime runtime = Runtime.getRuntime();
+    private final Random rnd = new Random(SEED);
 
-        System.gc();
-        System.out.println((runtime.totalMemory() - runtime.freeMemory()) / 
(1024 * 1024));
+    private File directory;
+    private SegmentStore segmentStore;
+
+    private CompactionMap map;
+    private Map<RecordId, RecordId> reference;
+
+    @Before
+    public void setup() throws IOException {
+        directory = createTempFile(CompactionMapTest.class.getSimpleName(), 
"dir", new File("target"));
+        directory.delete();
+        directory.mkdir();
+
+        segmentStore = newFileStore(directory).create();
+        SegmentWriter writer = new SegmentWriter(segmentStore, getTracker(), 
V_11);
+        map = new CompactionMap(100000, writer.getTracker());
+        reference = newLinkedHashMap();
+    }
+
+    @After
+    public void tearDown() {
+        segmentStore.close();
+        directory.delete();
+    }
+
+    private SegmentTracker getTracker() {
+        return segmentStore.getTracker();
+    }
+
+    /**
+     * Returns a new valid record offset, between {@code a} and {@code b},
+     * exclusive.
+     */
+    private static int newValidOffset(Random random, int a, int b) {
+        int p = (a >> RECORD_ALIGN_BITS) + 1;
+        int q = (b >> RECORD_ALIGN_BITS);
+        return (p + random.nextInt(q - p)) << RECORD_ALIGN_BITS;
+    }
+
+    private Map<RecordId, RecordId> randomMap(int maxSegments, int 
maxEntriesPerSegment) {
+        Map<RecordId, RecordId> map = newHashMap();
+        int segments = rnd.nextInt(maxSegments);
+        for (int i = 0; i < segments; i++) {
+            SegmentId id = getTracker().newDataSegmentId();
+            int n = rnd.nextInt(maxEntriesPerSegment);
+            int offset = MAX_SEGMENT_SIZE;
+            for (int j = 0; j < n; j++) {
+                offset = newValidOffset(rnd, (n - j) << RECORD_ALIGN_BITS, 
offset);
+                RecordId before = new RecordId(id, offset);
+                RecordId after = new RecordId(
+                        getTracker().newDataSegmentId(),
+                        newValidOffset(rnd, 0, MAX_SEGMENT_SIZE));
+                map.put(before, after);
+            }
+        }
+        return map;
+    }
+
+    private void addRandomEntries(int maxSegments, int maxEntriesPerSegment) {
+        for (Entry<RecordId, RecordId> tuple : randomMap(maxSegments, 
maxEntriesPerSegment).entrySet()) {
+            reference.put(tuple.getKey(), tuple.getValue());
+            map.put(tuple.getKey(), tuple.getValue());
+        }
+    }
+
+    private void removeRandomEntries(int count) {
+        Set<SegmentId> remove = newHashSet();
+        for (int k = 0; k < count && !reference.isEmpty(); k++) {
+            int j = rnd.nextInt(reference.size());
+            remove.add(get(reference.keySet(), j).getSegmentId());
+        }
+
+        Set<UUID> removeUUIDs = newHashSet();
+        for (SegmentId sid : remove) {
+            removeUUIDs.add(new UUID(sid.getMostSignificantBits(), 
sid.getLeastSignificantBits()));
+            Iterator<RecordId> it = reference.keySet().iterator();
+            while (it.hasNext()) {
+                if (sid.equals(it.next().getSegmentId())) {
+                    it.remove();
+                }
+            }
+        }
+
+        map.compress(removeUUIDs);
+    }
+
+    private void checkMap() {
+        for (Entry<RecordId, RecordId> entry : reference.entrySet()) {
+            assertTrue("Failed with seed " + SEED,
+                    map.wasCompactedTo(entry.getKey(), entry.getValue()));
+            assertFalse("Failed with seed " + SEED,
+                    map.wasCompactedTo(entry.getValue(), entry.getKey()));
+        }
+    }
+
+    @Test
+    public void randomTest() {
+        int maxSegments = 10000;
+        int maxEntriesPerSegment = 10;
+
+        for (int k = 0; k < 10; k++) {
+            addRandomEntries(rnd.nextInt(maxSegments) + 1, 
rnd.nextInt(maxEntriesPerSegment) + 1);
+            if (!reference.isEmpty()) {
+                removeRandomEntries(rnd.nextInt(reference.size()));
+            }
+            checkMap();
+        }
+        map.compress();
+        checkMap();
+    }
 
-        SegmentTracker factory = new MemoryStore().getTracker();
-        CompactionMap map = new CompactionMap(100000, factory);
+    @Test
+    public void benchLargeMap() {
+        assumeTrue(BENCH);
+
+        // check the memory use of really large mappings, 1M compacted 
segments with 10 records each.
+        Runtime runtime = Runtime.getRuntime();
+        Stopwatch timer = Stopwatch.createStarted();
         for (int i = 0; i < 1000000; i++) {
-            if (i % 1000 == 0) {
+            if (i % 100000 == 0) {
                 System.gc();
-                System.out.println(i + ": " + (runtime.totalMemory() - 
runtime.freeMemory()) / (1024 * 1024) + "MB");
+                System.out.println(
+                        i + ": " + (runtime.totalMemory() - 
runtime.freeMemory()) /
+                                (1024 * 1024) + "MB, " + timer.toString());
+                timer.reset();
+                timer.start();
             }
-            SegmentId sid = factory.newDataSegmentId();
+            SegmentId sid = getTracker().newDataSegmentId();
             for (int j = 0; j < 10; j++) {
                 RecordId rid = new RecordId(sid, j << RECORD_ALIGN_BITS);
                 map.put(rid, rid);
             }
         }
-
         map.compress();
+
         System.gc();
-        System.out.println("final: " + (runtime.totalMemory() - 
runtime.freeMemory()) / (1024 * 1024) + "MB");
-        System.out.println("Compaction map: " + map.getCompactionStats());
+        System.out.println(
+                "final: " + (runtime.totalMemory() - runtime.freeMemory()) /
+                        (1024 * 1024) + "MB, " + timer.toString());
     }
 
     @Test
-    public void testCompactionMap() {
-        int maxSegments = 1000;
-        int maxEntriesPerSegment = 10;
-        int seed = new Random().nextInt();
-        Random r = new Random(seed);
+    public void benchPut() throws Exception {
+        assumeTrue(BENCH);
 
-        SegmentTracker factory = new MemoryStore().getTracker();
-        CompactionMap map = new CompactionMap(r.nextInt(maxSegments / 2), 
factory);
-        Map<RecordId, RecordId> entries = newHashMap();
+        run(new PutBenchmark(0, 0));
+        run(new PutBenchmark(1000, 10));
+        run(new PutBenchmark(10000, 10));
+        run(new PutBenchmark(100000, 10));
+        run(new PutBenchmark(1000000, 10));
+    }
 
-        int segments = r.nextInt(maxSegments);
-        for (int i = 0; i < segments; i++) {
-            SegmentId id = factory.newDataSegmentId();
-            int n = r.nextInt(maxEntriesPerSegment);
-            int offset = MAX_SEGMENT_SIZE;
-            for (int j = 0; j < n; j++) {
-                offset = newValidOffset(r, (n - j) << RECORD_ALIGN_BITS, 
offset);
-                RecordId before = new RecordId(id, offset);
-                RecordId after = new RecordId(
-                        factory.newDataSegmentId(),
-                        newValidOffset(r, 0, MAX_SEGMENT_SIZE));
-                entries.put(before, after);
-                map.put(before, after);
-                assertTrue("Failed with seed " + seed,
-                        map.wasCompactedTo(before, after));
-                assertFalse("Failed with seed " + seed,
-                        map.wasCompactedTo(after, before));
+    @Test
+    public void benchGet() throws Exception {
+        assumeTrue(BENCH);
+
+        run(new GetBenchmark(1000, 10));
+        run(new GetBenchmark(10000, 10));
+        run(new GetBenchmark(100000, 10));
+        run(new GetBenchmark(1000000, 10));
+    }
+
+    private class PutBenchmark extends Benchmark {
+        private final int maxSegments;
+        private final int maxEntriesPerSegment;
+
+        private Map<RecordId, RecordId> putIds;
+
+        public PutBenchmark(int maxSegments, int maxEntriesPerSegment) {
+            this.maxSegments = maxSegments;
+            this.maxEntriesPerSegment = maxEntriesPerSegment;
+        }
+
+        @Override
+        public void setup() throws IOException {
+            if (maxSegments > 0) {
+                addRandomEntries(maxSegments, maxEntriesPerSegment);
             }
         }
-        map.compress();
 
-        for (Entry<RecordId, RecordId> entry : entries.entrySet()) {
-            assertTrue("Failed with seed " + seed,
-                    map.wasCompactedTo(entry.getKey(), entry.getValue()));
-            assertFalse("Failed with seed " + seed,
-                    map.wasCompactedTo(entry.getValue(), entry.getKey()));
+        @Override
+        public void beforeRun() throws Exception {
+            putIds = randomMap(1000, 10);
+        }
+
+        @Override
+        public void run() {
+            for (Entry<RecordId, RecordId> tuple : putIds.entrySet()) {
+                map.put(tuple.getKey(), tuple.getValue());
+            }
+        }
+
+        @Override
+        public String toString() {
+            return "Put benchmark: maxSegments=" + maxSegments + ", 
maxEntriesPerSegment=" + maxEntriesPerSegment;
         }
     }
 
-    /**
-     * Returns a new valid record offset, between {@code a} and {@code b},
-     * exclusive.
-     */
-    private static int newValidOffset(Random random, int a, int b) {
-        int p = (a >> RECORD_ALIGN_BITS) + 1;
-        int q = (b >> RECORD_ALIGN_BITS);
-        return (p + random.nextInt(q - p)) << RECORD_ALIGN_BITS;
+    private class GetBenchmark extends Benchmark {
+        private final int maxSegments;
+        private final int maxEntriesPerSegment;
+
+        private final List<RecordId> getCandidateIds = newArrayList();
+        private final List<RecordId> getIds = newArrayList();
+
+        public GetBenchmark(int maxSegments, int maxEntriesPerSegment) {
+            this.maxSegments = maxSegments;
+            this.maxEntriesPerSegment = maxEntriesPerSegment;
+        }
+
+        @Override
+        public void setup() throws IOException {
+            addRandomEntries(maxSegments, maxEntriesPerSegment);
+            map.compress();
+            for (RecordId recordId : reference.keySet()) {
+                if (rnd.nextInt(reference.size()) % 10000 == 0) {
+                    getCandidateIds.add(recordId);
+                }
+            }
+            for (int k = 0; k < 10000; k++) {
+                getCandidateIds.add(new RecordId(
+                        getTracker().newDataSegmentId(),
+                        newValidOffset(rnd, 0, MAX_SEGMENT_SIZE)));
+            }
+        }
+
+        @Override
+        public void beforeRun() throws Exception {
+            for (int k = 0; k < 10000; k ++) {
+                
getIds.add(getCandidateIds.get(rnd.nextInt(getCandidateIds.size())));
+            }
+        }
+
+        @Override
+        public void run() {
+            for (RecordId id : getIds) {
+                map.get(id);
+            }
+        }
+
+        @Override
+        public void afterRun() throws Exception {
+            getIds.clear();
+        }
+
+        @Override
+        public String toString() {
+            return "Get benchmark: maxSegments=" + maxSegments + ", 
maxEntriesPerSegment=" + maxEntriesPerSegment;
+        }
     }
+
 }

Modified: jackrabbit/oak/trunk/oak-jcr/pom.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-jcr/pom.xml?rev=1679959&r1=1679958&r2=1679959&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-jcr/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-jcr/pom.xml Mon May 18 08:32:37 2015
@@ -336,7 +336,6 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-math3</artifactId>
-      <version>3.2</version>
       <scope>test</scope>
     </dependency>
     <dependency>

Modified: jackrabbit/oak/trunk/oak-parent/pom.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-parent/pom.xml?rev=1679959&r1=1679958&r2=1679959&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-parent/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-parent/pom.xml Mon May 18 08:32:37 2015
@@ -440,6 +440,11 @@
         <artifactId>commons-lang</artifactId>
         <version>2.6</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-math3</artifactId>
+        <version>3.2</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 


Reply via email to