Author: mduerig
Date: Mon May 18 08:32:37 2015
New Revision: 1679959
URL: http://svn.apache.org/r1679959
Log:
OAK-2862: CompactionMap#compress() inefficient for large compaction maps
CompactionMap test and benchmark
Added:
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/benchmark/
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/benchmark/MicroBenchmark.java
Modified:
jackrabbit/oak/trunk/oak-commons/pom.xml
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionMapTest.java
jackrabbit/oak/trunk/oak-jcr/pom.xml
jackrabbit/oak/trunk/oak-parent/pom.xml
Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1679959&r1=1679958&r2=1679959&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-commons/pom.xml Mon May 18 08:32:37 2015
@@ -103,5 +103,9 @@
<artifactId>logback-classic</artifactId>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ </dependency>
</dependencies>
</project>
Added:
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/benchmark/MicroBenchmark.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/benchmark/MicroBenchmark.java?rev=1679959&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/benchmark/MicroBenchmark.java
(added)
+++
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/benchmark/MicroBenchmark.java
Mon May 18 08:32:37 2015
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.commons.benchmark;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+
+
+/**
+ * Poor man's micro benchmark suite.
+ * <p>
+ * Implementations of {@link Benchmark} are executed by the {@link
#run(Benchmark)} method.
+ * Execution consists of a warm up phase followed by the actual benchmark run.
+ */
+public final class MicroBenchmark {
+ private MicroBenchmark() { }
+
+ /**
+ * Benchmark base class.
+ */
+ public abstract static class Benchmark {
+
+ /**
+ * The benchmark runner calls this method first and exactly once by
for setting up
+ * the test fixture.
+ */
+ public void setup() throws Exception { }
+
+ /**
+ * The benchmark runner calls the method before every call to the
{@code run}
+ * method for setting the scope of the subsequent call to {@code run}.
+ */
+ public void beforeRun() throws Exception { }
+
+ /**
+ * The benchmark runner calls this method a number of times to measure
its
+ * runtime performance.
+ */
+ public abstract void run() throws Exception;
+
+ /**
+ * The benchmark runner calls the method after every call to the
{@code run}
+ * method for tearing down the scope of the previous call to {@code
run}.
+ */
+ public void afterRun() throws Exception { }
+
+ /**
+ * The benchmark runner calls this method exactly once and only if the
benchmark
+ * did not result in an error. This default implementation tabulates
+ * the percentiles of the gathered test statistics.
+ */
+ public void result(DescriptiveStatistics statistics) {
+ System.out.println(this);
+ if (statistics.getN() > 0) {
+ System.out.format(
+ "%6s %6s %6s %6s %6s %6s%n",
+ "min", "10%", "50%", "90%", "max", "N");
+ System.out.format(
+ "%6.0f %6.0f %6.0f %6.0f %6.0f %6d%n",
+ statistics.getMin() / 1000000,
+ statistics.getPercentile(10.0) / 1000000,
+ statistics.getPercentile(50.0) / 1000000,
+ statistics.getPercentile(90.0) / 1000000,
+ statistics.getMax() / 1000000,
+ statistics.getN());
+ } else {
+ System.out.println("No results");
+ }
+ }
+
+ /**
+ * The benchmark runner calls this method last and exactly once for
tearing down
+ * the test fixture.
+ */
+ public void tearDown() throws Exception { }
+ }
+
+ /**
+ * Run a {@code benchmark}
+ * @param benchmark
+ * @throws Exception
+ */
+ public static void run(Benchmark benchmark) throws Exception {
+ benchmark.setup();
+ try {
+ // Warm up
+ runTest(benchmark);
+
+ // Run the test
+ benchmark.result(runTest(benchmark));
+ } finally {
+ benchmark.tearDown();
+ }
+ }
+
+ private static DescriptiveStatistics runTest(Benchmark benchmark) throws
Exception {
+ final DescriptiveStatistics statistics = new DescriptiveStatistics();
+ long runtimeEnd = System.currentTimeMillis() +
TimeUnit.SECONDS.toMillis(60);
+ while (System.currentTimeMillis() < runtimeEnd) {
+ statistics.addValue(execute(benchmark));
+ }
+ return statistics;
+ }
+
+ private static double execute(Benchmark benchmark) throws Exception {
+ benchmark.beforeRun();
+ try {
+ long start = System.nanoTime();
+ benchmark.run();
+ return System.nanoTime() - start;
+ } finally {
+ benchmark.afterRun();
+ }
+ }
+
+}
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionMapTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionMapTest.java?rev=1679959&r1=1679958&r2=1679959&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionMapTest.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionMapTest.java
Mon May 18 08:32:37 2015
@@ -1,111 +1,308 @@
/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
package org.apache.jackrabbit.oak.plugins.segment;
+import static com.google.common.collect.Iterables.get;
+import static com.google.common.collect.Lists.newArrayList;
import static com.google.common.collect.Maps.newHashMap;
+import static com.google.common.collect.Maps.newLinkedHashMap;
+import static com.google.inject.internal.util.$Sets.newHashSet;
+import static java.io.File.createTempFile;
import static junit.framework.Assert.assertTrue;
+import static org.apache.jackrabbit.oak.commons.benchmark.MicroBenchmark.run;
import static
org.apache.jackrabbit.oak.plugins.segment.Segment.MAX_SEGMENT_SIZE;
import static
org.apache.jackrabbit.oak.plugins.segment.Segment.RECORD_ALIGN_BITS;
+import static org.apache.jackrabbit.oak.plugins.segment.SegmentVersion.V_11;
+import static
org.apache.jackrabbit.oak.plugins.segment.file.FileStore.newFileStore;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assume.assumeTrue;
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
+import java.util.Set;
+import java.util.UUID;
-import org.apache.jackrabbit.oak.plugins.segment.memory.MemoryStore;
+import com.google.common.base.Stopwatch;
+import org.apache.jackrabbit.oak.commons.benchmark.MicroBenchmark.Benchmark;
+import org.junit.After;
+import org.junit.Before;
import org.junit.Test;
public class CompactionMapTest {
+ private static final boolean BENCH = Boolean.getBoolean("benchmark");
+ private static final int SEED = Integer.getInteger("SEED", new
Random().nextInt());
- public static void main(String[] args) {
- // check the memory use of really large mappings, 1M compacted
- // segments with 10 records each.
- Runtime runtime = Runtime.getRuntime();
+ private final Random rnd = new Random(SEED);
- System.gc();
- System.out.println((runtime.totalMemory() - runtime.freeMemory()) /
(1024 * 1024));
+ private File directory;
+ private SegmentStore segmentStore;
+
+ private CompactionMap map;
+ private Map<RecordId, RecordId> reference;
+
+ @Before
+ public void setup() throws IOException {
+ directory = createTempFile(CompactionMapTest.class.getSimpleName(),
"dir", new File("target"));
+ directory.delete();
+ directory.mkdir();
+
+ segmentStore = newFileStore(directory).create();
+ SegmentWriter writer = new SegmentWriter(segmentStore, getTracker(),
V_11);
+ map = new CompactionMap(100000, writer.getTracker());
+ reference = newLinkedHashMap();
+ }
+
+ @After
+ public void tearDown() {
+ segmentStore.close();
+ directory.delete();
+ }
+
+ private SegmentTracker getTracker() {
+ return segmentStore.getTracker();
+ }
+
+ /**
+ * Returns a new valid record offset, between {@code a} and {@code b},
+ * exclusive.
+ */
+ private static int newValidOffset(Random random, int a, int b) {
+ int p = (a >> RECORD_ALIGN_BITS) + 1;
+ int q = (b >> RECORD_ALIGN_BITS);
+ return (p + random.nextInt(q - p)) << RECORD_ALIGN_BITS;
+ }
+
+ private Map<RecordId, RecordId> randomMap(int maxSegments, int
maxEntriesPerSegment) {
+ Map<RecordId, RecordId> map = newHashMap();
+ int segments = rnd.nextInt(maxSegments);
+ for (int i = 0; i < segments; i++) {
+ SegmentId id = getTracker().newDataSegmentId();
+ int n = rnd.nextInt(maxEntriesPerSegment);
+ int offset = MAX_SEGMENT_SIZE;
+ for (int j = 0; j < n; j++) {
+ offset = newValidOffset(rnd, (n - j) << RECORD_ALIGN_BITS,
offset);
+ RecordId before = new RecordId(id, offset);
+ RecordId after = new RecordId(
+ getTracker().newDataSegmentId(),
+ newValidOffset(rnd, 0, MAX_SEGMENT_SIZE));
+ map.put(before, after);
+ }
+ }
+ return map;
+ }
+
+ private void addRandomEntries(int maxSegments, int maxEntriesPerSegment) {
+ for (Entry<RecordId, RecordId> tuple : randomMap(maxSegments,
maxEntriesPerSegment).entrySet()) {
+ reference.put(tuple.getKey(), tuple.getValue());
+ map.put(tuple.getKey(), tuple.getValue());
+ }
+ }
+
+ private void removeRandomEntries(int count) {
+ Set<SegmentId> remove = newHashSet();
+ for (int k = 0; k < count && !reference.isEmpty(); k++) {
+ int j = rnd.nextInt(reference.size());
+ remove.add(get(reference.keySet(), j).getSegmentId());
+ }
+
+ Set<UUID> removeUUIDs = newHashSet();
+ for (SegmentId sid : remove) {
+ removeUUIDs.add(new UUID(sid.getMostSignificantBits(),
sid.getLeastSignificantBits()));
+ Iterator<RecordId> it = reference.keySet().iterator();
+ while (it.hasNext()) {
+ if (sid.equals(it.next().getSegmentId())) {
+ it.remove();
+ }
+ }
+ }
+
+ map.compress(removeUUIDs);
+ }
+
+ private void checkMap() {
+ for (Entry<RecordId, RecordId> entry : reference.entrySet()) {
+ assertTrue("Failed with seed " + SEED,
+ map.wasCompactedTo(entry.getKey(), entry.getValue()));
+ assertFalse("Failed with seed " + SEED,
+ map.wasCompactedTo(entry.getValue(), entry.getKey()));
+ }
+ }
+
+ @Test
+ public void randomTest() {
+ int maxSegments = 10000;
+ int maxEntriesPerSegment = 10;
+
+ for (int k = 0; k < 10; k++) {
+ addRandomEntries(rnd.nextInt(maxSegments) + 1,
rnd.nextInt(maxEntriesPerSegment) + 1);
+ if (!reference.isEmpty()) {
+ removeRandomEntries(rnd.nextInt(reference.size()));
+ }
+ checkMap();
+ }
+ map.compress();
+ checkMap();
+ }
- SegmentTracker factory = new MemoryStore().getTracker();
- CompactionMap map = new CompactionMap(100000, factory);
+ @Test
+ public void benchLargeMap() {
+ assumeTrue(BENCH);
+
+ // check the memory use of really large mappings, 1M compacted
segments with 10 records each.
+ Runtime runtime = Runtime.getRuntime();
+ Stopwatch timer = Stopwatch.createStarted();
for (int i = 0; i < 1000000; i++) {
- if (i % 1000 == 0) {
+ if (i % 100000 == 0) {
System.gc();
- System.out.println(i + ": " + (runtime.totalMemory() -
runtime.freeMemory()) / (1024 * 1024) + "MB");
+ System.out.println(
+ i + ": " + (runtime.totalMemory() -
runtime.freeMemory()) /
+ (1024 * 1024) + "MB, " + timer.toString());
+ timer.reset();
+ timer.start();
}
- SegmentId sid = factory.newDataSegmentId();
+ SegmentId sid = getTracker().newDataSegmentId();
for (int j = 0; j < 10; j++) {
RecordId rid = new RecordId(sid, j << RECORD_ALIGN_BITS);
map.put(rid, rid);
}
}
-
map.compress();
+
System.gc();
- System.out.println("final: " + (runtime.totalMemory() -
runtime.freeMemory()) / (1024 * 1024) + "MB");
- System.out.println("Compaction map: " + map.getCompactionStats());
+ System.out.println(
+ "final: " + (runtime.totalMemory() - runtime.freeMemory()) /
+ (1024 * 1024) + "MB, " + timer.toString());
}
@Test
- public void testCompactionMap() {
- int maxSegments = 1000;
- int maxEntriesPerSegment = 10;
- int seed = new Random().nextInt();
- Random r = new Random(seed);
+ public void benchPut() throws Exception {
+ assumeTrue(BENCH);
- SegmentTracker factory = new MemoryStore().getTracker();
- CompactionMap map = new CompactionMap(r.nextInt(maxSegments / 2),
factory);
- Map<RecordId, RecordId> entries = newHashMap();
+ run(new PutBenchmark(0, 0));
+ run(new PutBenchmark(1000, 10));
+ run(new PutBenchmark(10000, 10));
+ run(new PutBenchmark(100000, 10));
+ run(new PutBenchmark(1000000, 10));
+ }
- int segments = r.nextInt(maxSegments);
- for (int i = 0; i < segments; i++) {
- SegmentId id = factory.newDataSegmentId();
- int n = r.nextInt(maxEntriesPerSegment);
- int offset = MAX_SEGMENT_SIZE;
- for (int j = 0; j < n; j++) {
- offset = newValidOffset(r, (n - j) << RECORD_ALIGN_BITS,
offset);
- RecordId before = new RecordId(id, offset);
- RecordId after = new RecordId(
- factory.newDataSegmentId(),
- newValidOffset(r, 0, MAX_SEGMENT_SIZE));
- entries.put(before, after);
- map.put(before, after);
- assertTrue("Failed with seed " + seed,
- map.wasCompactedTo(before, after));
- assertFalse("Failed with seed " + seed,
- map.wasCompactedTo(after, before));
+ @Test
+ public void benchGet() throws Exception {
+ assumeTrue(BENCH);
+
+ run(new GetBenchmark(1000, 10));
+ run(new GetBenchmark(10000, 10));
+ run(new GetBenchmark(100000, 10));
+ run(new GetBenchmark(1000000, 10));
+ }
+
+ private class PutBenchmark extends Benchmark {
+ private final int maxSegments;
+ private final int maxEntriesPerSegment;
+
+ private Map<RecordId, RecordId> putIds;
+
+ public PutBenchmark(int maxSegments, int maxEntriesPerSegment) {
+ this.maxSegments = maxSegments;
+ this.maxEntriesPerSegment = maxEntriesPerSegment;
+ }
+
+ @Override
+ public void setup() throws IOException {
+ if (maxSegments > 0) {
+ addRandomEntries(maxSegments, maxEntriesPerSegment);
}
}
- map.compress();
- for (Entry<RecordId, RecordId> entry : entries.entrySet()) {
- assertTrue("Failed with seed " + seed,
- map.wasCompactedTo(entry.getKey(), entry.getValue()));
- assertFalse("Failed with seed " + seed,
- map.wasCompactedTo(entry.getValue(), entry.getKey()));
+ @Override
+ public void beforeRun() throws Exception {
+ putIds = randomMap(1000, 10);
+ }
+
+ @Override
+ public void run() {
+ for (Entry<RecordId, RecordId> tuple : putIds.entrySet()) {
+ map.put(tuple.getKey(), tuple.getValue());
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "Put benchmark: maxSegments=" + maxSegments + ",
maxEntriesPerSegment=" + maxEntriesPerSegment;
}
}
- /**
- * Returns a new valid record offset, between {@code a} and {@code b},
- * exclusive.
- */
- private static int newValidOffset(Random random, int a, int b) {
- int p = (a >> RECORD_ALIGN_BITS) + 1;
- int q = (b >> RECORD_ALIGN_BITS);
- return (p + random.nextInt(q - p)) << RECORD_ALIGN_BITS;
+ private class GetBenchmark extends Benchmark {
+ private final int maxSegments;
+ private final int maxEntriesPerSegment;
+
+ private final List<RecordId> getCandidateIds = newArrayList();
+ private final List<RecordId> getIds = newArrayList();
+
+ public GetBenchmark(int maxSegments, int maxEntriesPerSegment) {
+ this.maxSegments = maxSegments;
+ this.maxEntriesPerSegment = maxEntriesPerSegment;
+ }
+
+ @Override
+ public void setup() throws IOException {
+ addRandomEntries(maxSegments, maxEntriesPerSegment);
+ map.compress();
+ for (RecordId recordId : reference.keySet()) {
+ if (rnd.nextInt(reference.size()) % 10000 == 0) {
+ getCandidateIds.add(recordId);
+ }
+ }
+ for (int k = 0; k < 10000; k++) {
+ getCandidateIds.add(new RecordId(
+ getTracker().newDataSegmentId(),
+ newValidOffset(rnd, 0, MAX_SEGMENT_SIZE)));
+ }
+ }
+
+ @Override
+ public void beforeRun() throws Exception {
+ for (int k = 0; k < 10000; k ++) {
+
getIds.add(getCandidateIds.get(rnd.nextInt(getCandidateIds.size())));
+ }
+ }
+
+ @Override
+ public void run() {
+ for (RecordId id : getIds) {
+ map.get(id);
+ }
+ }
+
+ @Override
+ public void afterRun() throws Exception {
+ getIds.clear();
+ }
+
+ @Override
+ public String toString() {
+ return "Get benchmark: maxSegments=" + maxSegments + ",
maxEntriesPerSegment=" + maxEntriesPerSegment;
+ }
}
+
}
Modified: jackrabbit/oak/trunk/oak-jcr/pom.xml
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-jcr/pom.xml?rev=1679959&r1=1679958&r2=1679959&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-jcr/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-jcr/pom.xml Mon May 18 08:32:37 2015
@@ -336,7 +336,6 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
- <version>3.2</version>
<scope>test</scope>
</dependency>
<dependency>
Modified: jackrabbit/oak/trunk/oak-parent/pom.xml
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-parent/pom.xml?rev=1679959&r1=1679958&r2=1679959&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-parent/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-parent/pom.xml Mon May 18 08:32:37 2015
@@ -440,6 +440,11 @@
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ <version>3.2</version>
+ </dependency>
</dependencies>
</dependencyManagement>