Repository: accumulo Updated Branches: refs/heads/1.7 6df97b890 -> 10aba2a98 refs/heads/master 7c746a3e2 -> 1bd6fe0dd
ACCUMULO-3987 Utility for generating the nasty rfile Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/10aba2a9 Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/10aba2a9 Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/10aba2a9 Branch: refs/heads/1.7 Commit: 10aba2a98e36246d68441ff8e8b1ece8205c4afe Parents: 6df97b8 Author: Josh Elser <els...@apache.org> Authored: Wed Sep 9 20:14:21 2015 -0400 Committer: Josh Elser <els...@apache.org> Committed: Wed Sep 9 20:14:21 2015 -0400 ---------------------------------------------------------------------- .../accumulo/test/GenerateSequentialRFile.java | 82 ++++++++++++++++++++ .../test/BulkImportSequentialRowsIT.java | 36 ++++----- 2 files changed, 96 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/10aba2a9/test/src/main/java/org/apache/accumulo/test/GenerateSequentialRFile.java ---------------------------------------------------------------------- diff --git a/test/src/main/java/org/apache/accumulo/test/GenerateSequentialRFile.java b/test/src/main/java/org/apache/accumulo/test/GenerateSequentialRFile.java new file mode 100644 index 0000000..927dff9 --- /dev/null +++ b/test/src/main/java/org/apache/accumulo/test/GenerateSequentialRFile.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.accumulo.test; + +import java.nio.charset.StandardCharsets; + +import org.apache.accumulo.core.cli.Help; +import org.apache.accumulo.core.conf.DefaultConfiguration; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.file.FileOperations; +import org.apache.accumulo.core.file.FileSKVWriter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.io.Text; + +import com.beust.jcommander.Parameter; + +/** + * Simple tool to reproduce the file that caused problems in ACCUMULO-3967 + */ +public class GenerateSequentialRFile implements Runnable { + private static final Text CF = new Text("CF"); + private static final Text CQ = new Text("CQ"); + + private final Opts opts; + + public GenerateSequentialRFile(Opts opts) { + this.opts = opts; + } + + static class Opts extends Help { + @Parameter(names = {"-f", "--file"}, description = "Path to the file to create") + String filePath; + @Parameter(names = {"-nr"}, description = "Number of rows") + long rows = 24; + @Parameter(names = {"-nv"}, description = "Number of values per row") + long valuesPerRow = 42000; + } + + public void run() { + try { + final Configuration conf = new Configuration(); + final FileSystem fs = FileSystem.getLocal(conf); + FileSKVWriter writer = FileOperations.getInstance().openWriter(opts.filePath, fs, conf, DefaultConfiguration.getInstance()); + + writer.startDefaultLocalityGroup(); + + for (int x = 0; x < opts.rows; x++) { + final Text row = new Text(String.format("%03d", x)); + for (int y = 0; y < opts.valuesPerRow; y++) { + final String suffix = String.format("%05d", y); + writer.append(new Key(new Text(row + ":" + suffix), CF, CQ), new Value(suffix.getBytes(StandardCharsets.UTF_8))); + } + } + + writer.close(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public static void main(String[] args) throws Exception { + Opts opts = new Opts(); + opts.parseArgs(GenerateSequentialRFile.class.getName(), args); + new GenerateSequentialRFile(opts).run(); + } +} http://git-wip-us.apache.org/repos/asf/accumulo/blob/10aba2a9/test/src/test/java/org/apache/accumulo/test/BulkImportSequentialRowsIT.java ---------------------------------------------------------------------- diff --git a/test/src/test/java/org/apache/accumulo/test/BulkImportSequentialRowsIT.java b/test/src/test/java/org/apache/accumulo/test/BulkImportSequentialRowsIT.java index 3fee282..a1d6f4b 100644 --- a/test/src/test/java/org/apache/accumulo/test/BulkImportSequentialRowsIT.java +++ b/test/src/test/java/org/apache/accumulo/test/BulkImportSequentialRowsIT.java @@ -19,15 +19,9 @@ package org.apache.accumulo.test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import java.util.SortedSet; import java.util.TreeSet; import org.apache.accumulo.core.client.admin.TableOperations; -import org.apache.accumulo.core.conf.DefaultConfiguration; -import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.file.FileOperations; -import org.apache.accumulo.core.file.FileSKVWriter; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.harness.AccumuloClusterIT; import org.apache.accumulo.minicluster.impl.MiniAccumuloConfigImpl; @@ -46,6 +40,9 @@ import com.google.common.collect.Iterables; public class BulkImportSequentialRowsIT extends AccumuloClusterIT { private static final Logger log = LoggerFactory.getLogger(BulkImportSequentialRowsIT.class); + private static final long NR = 24; + private static final long NV = 42000; + @Override public int defaultTimeoutSeconds() { return 60; @@ -81,32 +78,27 @@ public class BulkImportSequentialRowsIT extends AccumuloClusterIT { assertTrue(fs.mkdirs(err)); Path rfile = new Path(bulk, "file.rf"); - FileSKVWriter writer = FileOperations.getInstance().openWriter(rfile.toString(), fs, new Configuration(), DefaultConfiguration.getInstance()); - writer.startDefaultLocalityGroup(); - - final Value emptyValue = new Value(new byte[0]); - final SortedSet<Text> splits = new TreeSet<Text>(); - for (int i = 0; i < 100; i++) { - String row = String.format("%03d", i); - splits.add(new Text(row)); - writer.append(new Key(row, "", ""), emptyValue); - for (int j = 0; j < 100; j++) { - writer.append(new Key(row, "", String.format("%03d", j)), emptyValue); - } - } - writer.close(); + + GenerateSequentialRFile.main(new String[] {"-f", rfile.toString(), "-nr", Long.toString(NR), "-nv", Long.toString(NV)}); assertTrue(fs.exists(rfile)); // Add some splits - to.addSplits(tableName, splits); + to.addSplits(tableName, getSplits()); // Then import a single rfile to all the tablets, hoping that we get a failure to import because of the balancer moving tablets around // and then we get to verify that the bug is actually fixed. to.importDirectory(tableName, bulk.toString(), err.toString(), false); // The bug is that some tablets don't get imported into. - assertEquals(10100, Iterables.size(getConnector().createScanner(tableName, Authorizations.EMPTY))); + assertEquals(NR * NV, Iterables.size(getConnector().createScanner(tableName, Authorizations.EMPTY))); } + private TreeSet<Text> getSplits() { + TreeSet<Text> splits = new TreeSet<>(); + for (int i = 0; i < NR; i++) { + splits.add(new Text(String.format("%03d", i))); + } + return splits; + } }