[accumulo] 01/01: Merge branch '1.8'

2017-11-30 Thread kturner
This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo.git

commit 26e83f05d1448631f0b8a0da1b8671abe9beb922
Merge: 345070d 06cb5ed
Author: Keith Turner 
AuthorDate: Thu Nov 30 21:08:53 2017 -0500

Merge branch '1.8'

 .../org/apache/accumulo/core/file/rfile/RFile.java |   9 +-
 .../accumulo/core/file/rfile/RollingStats.java | 114 
 .../core/file/rfile/RolllingStatsTest.java | 204 +
 3 files changed, 323 insertions(+), 4 deletions(-)

diff --cc core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
index c399a22,cda246a..f2e2463
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
@@@ -403,8 -402,9 +402,9 @@@ public class RFile 
  
  private SampleLocalityGroupWriter sample;
  
- private SummaryStatistics keyLenStats = new SummaryStatistics();
+ // Use windowed stats to fix ACCUMULO-4669
+ private RollingStats keyLenStats = new RollingStats(2017);
 -private double avergageKeySize = 0;
 +private double averageKeySize = 0;
  
  LocalityGroupWriter(BlockFileWriter fileWriter, long blockSize, long 
maxBlockSize, LocalityGroupMetadata currentLocalityGroup,
  SampleLocalityGroupWriter sample) {
diff --cc 
core/src/test/java/org/apache/accumulo/core/file/rfile/RolllingStatsTest.java
index 000,4f8fcd1..19f9c5c
mode 00,100644..100644
--- 
a/core/src/test/java/org/apache/accumulo/core/file/rfile/RolllingStatsTest.java
+++ 
b/core/src/test/java/org/apache/accumulo/core/file/rfile/RolllingStatsTest.java
@@@ -1,0 -1,205 +1,204 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+ 
+ package org.apache.accumulo.core.file.rfile;
+ 
+ import java.util.Random;
++import java.util.function.IntSupplier;
+ 
+ import org.apache.commons.math3.distribution.NormalDistribution;
+ import org.apache.commons.math3.distribution.ZipfDistribution;
+ import org.apache.commons.math3.random.Well19937c;
+ import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+ import org.junit.Assert;
+ import org.junit.Test;
+ 
+ import com.google.common.math.DoubleMath;
+ 
+ public class RolllingStatsTest {
+ 
+   private static final double TOLERANCE = 1.0 / 1000;
+ 
+   private static void assertFuzzyEquals(double expected, double actual) {
+ Assert.assertTrue(String.format("expected: %f, actual: %f diff: %f", 
expected, actual, Math.abs(expected - actual)),
+ DoubleMath.fuzzyEquals(expected, actual, TOLERANCE));
+   }
+ 
+   private static void checkAgreement(DescriptiveStatistics ds, RollingStats 
rs) {
+ // getting stats from ds is expensive, so do it once... otherwise unit 
test takes 11 sec
+ // instead of 5 secs
+ double expMean = ds.getMean();
+ double expVar = ds.getVariance();
+ double expStdDev = Math.sqrt(expVar);
+ 
+ assertFuzzyEquals(expMean, rs.getMean());
+ assertFuzzyEquals(expVar, rs.getVariance());
+ assertFuzzyEquals(expStdDev, rs.getStandardDeviation());
+ 
+ Assert.assertTrue(expMean >= 0);
+ Assert.assertTrue(rs.getMean() >= 0);
+ Assert.assertTrue(expVar >= 0);
+ Assert.assertTrue(rs.getVariance() >= 0);
+ Assert.assertTrue(expStdDev >= 0);
+ Assert.assertTrue(rs.getStandardDeviation() >= 0);
+   }
+ 
+   private static class StatTester {
+ 
+ Random rand = new Random(42);
+ private DescriptiveStatistics ds;
+ private RollingStats rs;
+ private RollingStats rsp;
+ 
+ StatTester(int windowSize) {
+   ds = new DescriptiveStatistics();
+   ds.setWindowSize(windowSize);
+ 
+   rs = new RollingStats(windowSize);
+   rsp = new RollingStats(windowSize);
+ }
+ 
+ void addValue(long v) {
+   ds.addValue(v);
+   rs.addValue(v);
+   rsp.addValue(v);
+   checkAgreement(ds, rs);
+ 
+   if (rand.nextDouble() < 0.001) {
+ checkAgreement(ds, rsp);
+   }
+ }
+ 
+ void check() {
+   checkAgreement(ds, rsp);
+ }
+   }
+ 
+   @Test
+   public void testFewSizes() {
+ StatTester st 

[accumulo] branch master updated (345070d -> 26e83f0)

2017-11-30 Thread kturner
This is an automated email from the ASF dual-hosted git repository.

kturner pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo.git.


from 345070d  Merge branch '1.8'
 add c28e11c  ACCUMULO-4669 Use windowed statistics in RFile
 add 06cb5ed  Merge branch '1.7' into 1.8
 new 26e83f0  Merge branch '1.8'

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../org/apache/accumulo/core/file/rfile/RFile.java |   9 +-
 .../accumulo/core/file/rfile/RollingStats.java | 114 
 .../core/file/rfile/RolllingStatsTest.java | 204 +
 3 files changed, 323 insertions(+), 4 deletions(-)
 create mode 100644 
core/src/main/java/org/apache/accumulo/core/file/rfile/RollingStats.java
 create mode 100644 
core/src/test/java/org/apache/accumulo/core/file/rfile/RolllingStatsTest.java

-- 
To stop receiving notification emails like this one, please contact
['"commits@accumulo.apache.org" '].


[accumulo] branch 1.8 updated (ed313f7 -> 06cb5ed)

2017-11-30 Thread kturner
This is an automated email from the ASF dual-hosted git repository.

kturner pushed a change to branch 1.8
in repository https://gitbox.apache.org/repos/asf/accumulo.git.


from ed313f7  ACCUMULO-4744 Fixed RFile API scanner bug (#324)
 add c28e11c  ACCUMULO-4669 Use windowed statistics in RFile
 new 06cb5ed  Merge branch '1.7' into 1.8

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../org/apache/accumulo/core/file/rfile/RFile.java |   9 +-
 .../accumulo/core/file/rfile/RollingStats.java | 114 
 .../core/file/rfile/RolllingStatsTest.java | 205 +
 3 files changed, 324 insertions(+), 4 deletions(-)
 create mode 100644 
core/src/main/java/org/apache/accumulo/core/file/rfile/RollingStats.java
 create mode 100644 
core/src/test/java/org/apache/accumulo/core/file/rfile/RolllingStatsTest.java

-- 
To stop receiving notification emails like this one, please contact
['"commits@accumulo.apache.org" '].


[accumulo] branch 1.7 updated: ACCUMULO-4669 Use windowed statistics in RFile

2017-11-30 Thread kturner
This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch 1.7
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/1.7 by this push:
 new c28e11c  ACCUMULO-4669 Use windowed statistics in RFile
c28e11c is described below

commit c28e11c1079d2e1c6d68078b3328016569138fed
Author: Keith Turner 
AuthorDate: Thu Nov 30 20:21:13 2017 -0500

ACCUMULO-4669 Use windowed statistics in RFile
---
 .../org/apache/accumulo/core/file/rfile/RFile.java |   9 +-
 .../accumulo/core/file/rfile/RollingStats.java | 113 +
 .../core/file/rfile/RolllingStatsTest.java | 178 +
 3 files changed, 296 insertions(+), 4 deletions(-)

diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java 
b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
index d5779ce..fe1c832 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
@@ -65,7 +65,6 @@ import 
org.apache.accumulo.core.iterators.system.LocalityGroupIterator.LocalityG
 import org.apache.accumulo.core.util.LocalityGroupUtil;
 import org.apache.accumulo.core.util.MutableByteSequence;
 import org.apache.commons.lang.mutable.MutableLong;
-import org.apache.commons.math.stat.descriptive.SummaryStatistics;
 import org.apache.hadoop.io.Writable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -310,7 +309,8 @@ public class RFile {
 
 private HashSet previousColumnFamilies;
 
-private SummaryStatistics keyLenStats = new SummaryStatistics();
+// Use windowed stats to fix ACCUMULO-4669
+private RollingStats keyLenStats = new RollingStats(2017);
 private double avergageKeySize = 0;
 
 public Writer(BlockFileWriter bfw, int blockSize) throws IOException {
@@ -370,8 +370,9 @@ public class RFile {
 }
 
 private boolean isGiantKey(Key k) {
-  // consider a key thats more than 3 standard deviations from previously 
seen key sizes as giant
-  return k.getSize() > keyLenStats.getMean() + 
keyLenStats.getStandardDeviation() * 3;
+  double mean = keyLenStats.getMean();
+  double stddev = keyLenStats.getStandardDeviation();
+  return k.getSize() > mean + Math.max(9 * mean, 4 * stddev);
 }
 
 @Override
diff --git 
a/core/src/main/java/org/apache/accumulo/core/file/rfile/RollingStats.java 
b/core/src/main/java/org/apache/accumulo/core/file/rfile/RollingStats.java
new file mode 100644
index 000..d223574
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RollingStats.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license
+ * agreements. See the NOTICE file distributed with this work for additional 
information regarding
+ * copyright ownership. The ASF licenses this file to You under the Apache 
License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the 
License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software 
distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
KIND, either express
+ * or implied. See the License for the specific language governing permissions 
and limitations under
+ * the License.
+ */
+package org.apache.accumulo.core.file.rfile;
+
+import org.apache.commons.math.stat.StatUtils;
+
+/**
+ * This class supports efficient window statistics. Apache commons math3 has a 
class called DescriptiveStatistics that supports windows. DescriptiveStatistics
+ * recomputes the statistics over the entire window each time its requested. 
In a test over 1,000,000 entries with a window size of 1019 that requested stats
+ * for each entry this class took ~50ms and DescriptiveStatistics took 
~6,000ms.
+ *
+ * 
+ * This class may not be as accurate as DescriptiveStatistics. In unit test 
its within 1/1000 of DescriptiveStatistics.
+ */
+class RollingStats {
+  private int position;
+  private double window[];
+
+  private double average;
+  private double variance;
+  private double stddev;
+
+  // indicates if the window is full
+  private boolean windowFull;
+
+  private int recomputeCounter = 0;
+
+  RollingStats(int windowSize) {
+this.windowFull = false;
+this.position = 0;
+this.window = new double[windowSize];
+  }
+
+  /**
+   * @see http://jonisalonen.com/2014/efficient-and-accurate-rolling-standard-deviation/;>Efficient
 and accurate rolling standard deviation
+   */
+  private void update(double newValue, double oldValue, int windowSize) {
+double delta = newValue - oldValue;
+
+double oldAverage = average;
+average = average + delta / windowSize;
+variance += delta * (newValue - average + 

[accumulo] 01/01: Merge branch '1.7' into 1.8

2017-11-30 Thread kturner
This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch 1.8
in repository https://gitbox.apache.org/repos/asf/accumulo.git

commit 06cb5ed4299d386612fd214ad0a1ab75c156d685
Merge: ed313f7 c28e11c
Author: Keith Turner 
AuthorDate: Thu Nov 30 20:49:01 2017 -0500

Merge branch '1.7' into 1.8

 .../org/apache/accumulo/core/file/rfile/RFile.java |   9 +-
 .../accumulo/core/file/rfile/RollingStats.java | 114 
 .../core/file/rfile/RolllingStatsTest.java | 205 +
 3 files changed, 324 insertions(+), 4 deletions(-)

diff --cc core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
index 4539392,fe1c832..cda246a
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
@@@ -327,103 -282,108 +326,105 @@@ public class RFile 
  
}
  
 -  public static class Writer implements FileSKVWriter {
 -
 -public static final int MAX_CF_IN_DLG = 1000;
 -private static final double MAX_BLOCK_MULTIPLIER = 1.1;
 -
 -private BlockFileWriter fileWriter;
 -private ABlockWriter blockWriter;
 +  private static class SampleEntry {
 +Key key;
 +Value val;
  
 -// private BlockAppender blockAppender;
 -private final long blockSize;
 -private final long maxBlockSize;
 -private final int indexBlockSize;
 -private int entries = 0;
 -
 -private ArrayList localityGroups = new 
ArrayList<>();
 -private LocalityGroupMetadata currentLocalityGroup = null;
 -private int nextBlock = 0;
 +SampleEntry(Key key, Value val) {
 +  this.key = new Key(key);
 +  this.val = new Value(val);
 +}
 +  }
  
 -private Key lastKeyInBlock = null;
 +  private static class SampleLocalityGroupWriter {
  
 -private boolean dataClosed = false;
 -private boolean closed = false;
 -private Key prevKey = new Key();
 -private boolean startedDefaultLocalityGroup = false;
 +private Sampler sampler;
  
 -private HashSet previousColumnFamilies;
 +private List entries = new ArrayList<>();
 +private long dataSize = 0;
  
 -// Use windowed stats to fix ACCUMULO-4669
 -private RollingStats keyLenStats = new RollingStats(2017);
 -private double avergageKeySize = 0;
 +private LocalityGroupWriter lgr;
  
 -public Writer(BlockFileWriter bfw, int blockSize) throws IOException {
 -  this(bfw, blockSize, (int) 
AccumuloConfiguration.getDefaultConfiguration().getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
 +public SampleLocalityGroupWriter(LocalityGroupWriter lgr, Sampler 
sampler) {
 +  this.lgr = lgr;
 +  this.sampler = sampler;
  }
  
 -public Writer(BlockFileWriter bfw, int blockSize, int indexBlockSize) 
throws IOException {
 -  this.blockSize = blockSize;
 -  this.maxBlockSize = (long) (blockSize * MAX_BLOCK_MULTIPLIER);
 -  this.indexBlockSize = indexBlockSize;
 -  this.fileWriter = bfw;
 -  this.blockWriter = null;
 -  previousColumnFamilies = new HashSet<>();
 +public void append(Key key, Value value) throws IOException {
 +  if (sampler.accept(key)) {
 +entries.add(new SampleEntry(key, value));
 +dataSize += key.getSize() + value.getSize();
 +  }
  }
  
 -@Override
 -public synchronized void close() throws IOException {
 -
 -  if (closed) {
 -return;
 +public void close() throws IOException {
 +  for (SampleEntry se : entries) {
 +lgr.append(se.key, se.val);
}
  
 -  closeData();
 +  lgr.close();
 +}
  
 -  ABlockWriter mba = fileWriter.prepareMetaBlock("RFile.index");
 +public void flushIfNeeded() throws IOException {
 +  if (dataSize > sampleBufferSize) {
 +// the reason to write out all but one key is so that closeBlock() 
can always eventually be called with true
 +List subList = entries.subList(0, entries.size() - 1);
  
 -  mba.writeInt(RINDEX_MAGIC);
 -  mba.writeInt(RINDEX_VER_7);
 -
 -  if (currentLocalityGroup != null)
 -localityGroups.add(currentLocalityGroup);
 +if (subList.size() > 0) {
 +  for (SampleEntry se : subList) {
 +lgr.append(se.key, se.val);
 +  }
  
 -  mba.writeInt(localityGroups.size());
 +  lgr.closeBlock(subList.get(subList.size() - 1).key, false);
  
 -  for (LocalityGroupMetadata lc : localityGroups) {
 -lc.write(mba);
 +  subList.clear();
 +  dataSize = 0;
 +}
}
 +}
 +  }
  
 -  mba.close();
 +  private static class LocalityGroupWriter {
  
 -  fileWriter.close();
 +private BlockFileWriter fileWriter;
 +private ABlockWriter blockWriter;
  
 -  closed = true;
 -}
 +// private BlockAppender blockAppender;
 +private final long blockSize;
 +private final long maxBlockSize;
 +private int 

[accumulo] branch master updated (41dcabb -> 345070d)

2017-11-30 Thread kturner
This is an automated email from the ASF dual-hosted git repository.

kturner pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo.git.


from 41dcabb  Merge branch '1.8'
 add ed313f7  ACCUMULO-4744 Fixed RFile API scanner bug (#324)
 new 345070d  Merge branch '1.8'

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../accumulo/core/client/rfile/RFileScanner.java   |  2 +-
 .../core/client/rfile/RFileSummariesRetriever.java |  6 +++---
 .../file/blockfile/impl/CachableBlockFile.java | 10 +
 .../accumulo/core/summary/SummaryReader.java   |  6 +++---
 .../accumulo/core/client/rfile/RFileTest.java  | 24 ++
 .../core/file/rfile/MultiLevelIndexTest.java   |  2 +-
 .../apache/accumulo/core/file/rfile/RFileTest.java |  5 +++--
 7 files changed, 41 insertions(+), 14 deletions(-)

-- 
To stop receiving notification emails like this one, please contact
['"commits@accumulo.apache.org" '].


[accumulo] branch 1.8 updated: ACCUMULO-4744 Fixed RFile API scanner bug (#324)

2017-11-30 Thread kturner
This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch 1.8
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/1.8 by this push:
 new ed313f7  ACCUMULO-4744 Fixed RFile API scanner bug (#324)
ed313f7 is described below

commit ed313f7703b49343825be26c31f6eed81bc37d83
Author: Keith Turner 
AuthorDate: Mon Nov 20 16:10:37 2017 -0500

ACCUMULO-4744 Fixed RFile API scanner bug (#324)
---
 .../accumulo/core/client/rfile/RFileScanner.java   |  3 ++-
 .../file/blockfile/impl/CachableBlockFile.java | 10 +
 .../accumulo/core/client/rfile/RFileTest.java  | 24 ++
 .../core/file/rfile/MultiLevelIndexTest.java   |  2 +-
 .../apache/accumulo/core/file/rfile/RFileTest.java |  5 +++--
 5 files changed, 36 insertions(+), 8 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScanner.java 
b/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScanner.java
index 4dfba68..bc0df82 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScanner.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScanner.java
@@ -265,7 +265,8 @@ class RFileScanner extends ScannerOptions implements 
Scanner {
   List> readers = new 
ArrayList<>(sources.length);
   for (int i = 0; i < sources.length; i++) {
 FSDataInputStream inputStream = (FSDataInputStream) 
sources[i].getInputStream();
-readers.add(new RFile.Reader(new CachableBlockFile.Reader(inputStream, 
sources[i].getLength(), opts.in.getConf(), dataCache, indexCache,
+
+readers.add(new RFile.Reader(new CachableBlockFile.Reader("source-" + 
i, inputStream, sources[i].getLength(), opts.in.getConf(), dataCache, 
indexCache,
 AccumuloConfiguration.getDefaultConfiguration(;
   }
 
diff --git 
a/core/src/main/java/org/apache/accumulo/core/file/blockfile/impl/CachableBlockFile.java
 
b/core/src/main/java/org/apache/accumulo/core/file/blockfile/impl/CachableBlockFile.java
index 4fa6634..3ecb5ca 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/file/blockfile/impl/CachableBlockFile.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/file/blockfile/impl/CachableBlockFile.java
@@ -147,7 +147,7 @@ public class CachableBlockFile {
   public static class Reader implements BlockFileReader {
 private final RateLimiter readLimiter;
 private BCFile.Reader _bc;
-private String fileName = "not_available";
+private final String fileName;
 private BlockCache _dCache = null;
 private BlockCache _iCache = null;
 private InputStream fin = null;
@@ -251,16 +251,18 @@ public class CachableBlockFile {
   this.readLimiter = readLimiter;
 }
 
-public  
Reader(InputStreamType fsin, long len, Configuration conf, BlockCache data, 
BlockCache index,
-AccumuloConfiguration accumuloConfiguration) throws IOException {
+public  Reader(String 
cacheId, InputStreamType fsin, long len, Configuration conf, BlockCache data,
+BlockCache index, AccumuloConfiguration accumuloConfiguration) throws 
IOException {
+  this.fileName = cacheId;
   this._dCache = data;
   this._iCache = index;
   this.readLimiter = null;
   init(fsin, len, conf, accumuloConfiguration);
 }
 
-public  
Reader(InputStreamType fsin, long len, Configuration conf,
+public  Reader(String 
cacheId, InputStreamType fsin, long len, Configuration conf,
 AccumuloConfiguration accumuloConfiguration) throws IOException {
+  this.fileName = cacheId;
   this.readLimiter = null;
   init(fsin, len, conf, accumuloConfiguration);
 }
diff --git 
a/core/src/test/java/org/apache/accumulo/core/client/rfile/RFileTest.java 
b/core/src/test/java/org/apache/accumulo/core/client/rfile/RFileTest.java
index 4993810..8748d8c 100644
--- a/core/src/test/java/org/apache/accumulo/core/client/rfile/RFileTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/client/rfile/RFileTest.java
@@ -25,6 +25,7 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Random;
@@ -623,4 +624,27 @@ public class RFileTest {
 
.withTableConfiguration(AccumuloConfiguration.getDefaultConfiguration()).build();
 return reader;
   }
+
+  @Test
+  public void testMultipleFilesAndCache() throws Exception {
+SortedMap testData = createTestData(100, 10, 10);
+List files = Arrays.asList(createTmpTestFile(), 
createTmpTestFile(), createTmpTestFile());
+
+LocalFileSystem localFs = FileSystem.getLocal(new Configuration());
+
+for (int i = 0; i < files.size(); i++) {
+  try (RFileWriter writer = 

[accumulo] 01/01: Merge branch '1.8'

2017-11-30 Thread kturner
This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo.git

commit 345070da63a7849f8aac8d71a283836366a2fd8e
Merge: 41dcabb ed313f7
Author: Keith Turner 
AuthorDate: Thu Nov 30 15:50:49 2017 -0500

Merge branch '1.8'

 .../accumulo/core/client/rfile/RFileScanner.java   |  2 +-
 .../core/client/rfile/RFileSummariesRetriever.java |  6 +++---
 .../file/blockfile/impl/CachableBlockFile.java | 10 +
 .../accumulo/core/summary/SummaryReader.java   |  6 +++---
 .../accumulo/core/client/rfile/RFileTest.java  | 24 ++
 .../core/file/rfile/MultiLevelIndexTest.java   |  2 +-
 .../apache/accumulo/core/file/rfile/RFileTest.java |  5 +++--
 7 files changed, 41 insertions(+), 14 deletions(-)

diff --cc 
core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScanner.java
index e10c073,bc0df82..301dfc5
--- a/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScanner.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScanner.java
@@@ -306,8 -265,9 +306,8 @@@ class RFileScanner extends ScannerOptio
List> readers = new 
ArrayList<>(sources.length);
for (int i = 0; i < sources.length; i++) {
  FSDataInputStream inputStream = (FSDataInputStream) 
sources[i].getInputStream();
- readers.add(new RFile.Reader(new 
CachableBlockFile.Reader(inputStream, sources[i].getLength(), 
opts.in.getConf(), dataCache, indexCache,
 -
+ readers.add(new RFile.Reader(new CachableBlockFile.Reader("source-" + 
i, inputStream, sources[i].getLength(), opts.in.getConf(), dataCache, 
indexCache,
 -AccumuloConfiguration.getDefaultConfiguration(;
 +DefaultConfiguration.getInstance(;
}
  
if (getSamplerConfiguration() != null) {
diff --cc 
core/src/main/java/org/apache/accumulo/core/client/rfile/RFileSummariesRetriever.java
index 1e47f00,000..d3a83b0
mode 100644,00..100644
--- 
a/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileSummariesRetriever.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileSummariesRetriever.java
@@@ -1,123 -1,0 +1,123 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + * http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +package org.apache.accumulo.core.client.rfile;
 +
 +import java.io.IOException;
 +import java.util.Collection;
 +import java.util.Collections;
 +import java.util.Objects;
 +import java.util.function.Predicate;
 +
 +import org.apache.accumulo.core.client.rfile.RFile.SummaryFSOptions;
 +import org.apache.accumulo.core.client.rfile.RFile.SummaryInputArguments;
 +import org.apache.accumulo.core.client.rfile.RFile.SummaryOptions;
 +import org.apache.accumulo.core.client.rfile.RFileScannerBuilder.InputArgs;
 +import org.apache.accumulo.core.client.summary.SummarizerConfiguration;
 +import org.apache.accumulo.core.client.summary.Summary;
 +import org.apache.accumulo.core.conf.AccumuloConfiguration;
 +import org.apache.accumulo.core.conf.DefaultConfiguration;
 +import org.apache.accumulo.core.summary.Gatherer;
 +import org.apache.accumulo.core.summary.SummarizerFactory;
 +import org.apache.accumulo.core.summary.SummaryCollection;
 +import org.apache.accumulo.core.summary.SummaryReader;
 +import org.apache.hadoop.conf.Configuration;
 +import org.apache.hadoop.fs.FileSystem;
 +import org.apache.hadoop.io.Text;
 +
 +class RFileSummariesRetriever implements SummaryInputArguments, 
SummaryFSOptions, SummaryOptions {
 +
 +  private Predicate summarySelector = sc -> true;
 +  private Text startRow;
 +  private InputArgs in;
 +  private Text endRow;
 +
 +  @Override
 +  public SummaryOptions selectSummaries(Predicate 
summarySelector) {
 +Objects.requireNonNull(summarySelector);
 +this.summarySelector = summarySelector;
 +return this;
 +  }
 +
 +  @Override
 +  public SummaryOptions startRow(CharSequence startRow) {
 +return startRow(new Text(startRow.toString()));
 +  }
 +
 +  @Override
 +  public SummaryOptions startRow(Text startRow) {
 +Objects.requireNonNull(startRow);
 +this.startRow = startRow;
 +   

[accumulo] 01/01: Merge branch '1.8'

2017-11-30 Thread mmiller
This is an automated email from the ASF dual-hosted git repository.

mmiller pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo.git

commit 41dcabb14772b192f32b1fa67f770378c4ea068f
Merge: 03139a3 d4722bf
Author: Mike Miller 
AuthorDate: Thu Nov 30 14:18:48 2017 -0500

Merge branch '1.8'

 Conflicts:

server/base/src/main/java/org/apache/accumulo/server/tables/TableManager.java

 .../accumulo/server/tables/TableManager.java   | 20 ++-
 .../IllegalTableTransitionExceptionTest.java   | 68 ++
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --cc 
server/base/src/main/java/org/apache/accumulo/server/tables/TableManager.java
index cfc5c90,0777f3f..74c4f62
--- 
a/server/base/src/main/java/org/apache/accumulo/server/tables/TableManager.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/tables/TableManager.java
@@@ -125,9 -142,14 +138,14 @@@ public class TableManager 
return newState;
  }
  
+ @Override
+ public String getMessage() {
+   return message;
+ }
+ 
}
  
 -  public synchronized void transitionTableState(final String tableId, final 
TableState newState) {
 +  public synchronized void transitionTableState(final Table.ID tableId, final 
TableState newState) {
  String statePath = ZooUtil.getRoot(HdfsZooInstance.getInstance()) + 
Constants.ZTABLES + "/" + tableId + Constants.ZTABLE_STATE;
  
  try {
@@@ -162,8 -184,8 +180,8 @@@
  }
});
  } catch (Exception e) {
-   // ACCUMULO-3651 Changed level to error and added FATAL to message for 
slf4j compability
+   // ACCUMULO-3651 Changed level to error and added FATAL to message for 
slf4j compatibility
 -  log.error("FATAL Failed to transition table to state " + newState);
 +  log.error("FATAL Failed to transition table to state {}", newState);
throw new RuntimeException(e);
  }
}

-- 
To stop receiving notification emails like this one, please contact
"commits@accumulo.apache.org" .


[accumulo] branch master updated (03139a3 -> 41dcabb)

2017-11-30 Thread mmiller
This is an automated email from the ASF dual-hosted git repository.

mmiller pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo.git.


from 03139a3  ACCUMULO-4740 Enable GCM mode for crypto
 add e738351  ACCUMULO-4546 Create default log message for table error 
(#327)
 add 238e471  ACCUMULO-4546 Fix StringUtils import
 add d4722bf  Merge branch '1.7' into 1.8
 new 41dcabb  Merge branch '1.8'

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../accumulo/server/tables/TableManager.java   | 20 ++-
 .../IllegalTableTransitionExceptionTest.java   | 68 ++
 2 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 
server/base/src/test/java/org/apache/accumulo/server/tables/IllegalTableTransitionExceptionTest.java

-- 
To stop receiving notification emails like this one, please contact
['"commits@accumulo.apache.org" '].


[accumulo] branch master updated: ACCUMULO-4740 Enable GCM mode for crypto

2017-11-30 Thread kturner
This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/master by this push:
 new 03139a3  ACCUMULO-4740 Enable GCM mode for crypto
03139a3 is described below

commit 03139a35f39b54f4bb57a4c26fba105d1dfae1f5
Author: Nick Felts <31989480+pirc...@users.noreply.github.com>
AuthorDate: Wed Nov 15 09:38:15 2017 -0500

ACCUMULO-4740 Enable GCM mode for crypto

Introduced the GCMParameterSpec constructor required by cipher
Updated IV management for AES-GCM (see Appendix A of NIST SP 800-38D)
---
 .../accumulo/core/conf/ConfigSanityCheck.java  |   2 +
 .../org/apache/accumulo/core/conf/Property.java|  13 +-
 .../accumulo/core/file/rfile/bcfile/BCFile.java|   5 +-
 .../security/crypto/CryptoModuleParameters.java|  48 -
 .../core/security/crypto/DefaultCryptoModule.java  |  73 +--
 .../accumulo/core/security/crypto/CryptoTest.java  | 221 -
 .../src/test/resources/crypto-on-accumulo-site.xml |   8 +-
 .../crypto-on-no-key-encryption-accumulo-site.xml  |   4 +
 .../org/apache/accumulo/tserver/log/DfsLogger.java |   3 +-
 9 files changed, 333 insertions(+), 44 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/conf/ConfigSanityCheck.java 
b/core/src/main/java/org/apache/accumulo/core/conf/ConfigSanityCheck.java
index baf1818..f787d5e 100644
--- a/core/src/main/java/org/apache/accumulo/core/conf/ConfigSanityCheck.java
+++ b/core/src/main/java/org/apache/accumulo/core/conf/ConfigSanityCheck.java
@@ -81,6 +81,8 @@ public class ConfigSanityCheck {
 
   if (key.equals(Property.CRYPTO_CIPHER_SUITE.getKey())) {
 cipherSuite = Objects.requireNonNull(value);
+Preconditions.checkArgument(cipherSuite.equals("NullCipher") || 
cipherSuite.split("/").length == 3,
+"Cipher suite must be NullCipher or in the form 
algorithm/mode/padding. Suite: " + cipherSuite + " is invalid.");
   }
 
   if (key.equals(Property.CRYPTO_CIPHER_KEY_ALGORITHM_NAME.getKey())) {
diff --git a/core/src/main/java/org/apache/accumulo/core/conf/Property.java 
b/core/src/main/java/org/apache/accumulo/core/conf/Property.java
index f09bd95..79e7b17 100644
--- a/core/src/main/java/org/apache/accumulo/core/conf/Property.java
+++ b/core/src/main/java/org/apache/accumulo/core/conf/Property.java
@@ -49,14 +49,13 @@ public enum Property {
   + "(future) other parts of the code."),
   @Experimental
   CRYPTO_CIPHER_SUITE("crypto.cipher.suite", "NullCipher", PropertyType.STRING,
-  "Describes the cipher suite to use for rfile encryption. If a WAL cipher 
suite is not set, it will default to this value. The suite should be in the "
-  + "form of algorithm/mode/padding, e.g. AES/CBC/NoPadding"),
+  "Describes the cipher suite to use for rfile encryption. The value must 
be either NullCipher or in the form of algorithm/mode/padding, "
+  + "e.g. AES/CBC/NoPadding"),
   @Experimental
-  CRYPTO_WAL_CIPHER_SUITE(
-  "crypto.wal.cipher.suite",
-  "NullCipher",
-  PropertyType.STRING,
-  "Describes the cipher suite to use for the write-ahead log. Defaults to 
'cyrpto.cipher.suite' and will use that value for WAL encryption unless 
otherwise specified."),
+  CRYPTO_WAL_CIPHER_SUITE("crypto.wal.cipher.suite", "", PropertyType.STRING,
+  "Describes the cipher suite to use for the write-ahead log. Defaults to 
'cyrpto.cipher.suite' "
+  + "and will use that value for WAL encryption unless otherwise 
specified. Valid suite values include: an empty string, NullCipher, or a string 
the "
+  + "form of algorithm/mode/padding, e.g. AES/CBC/NOPadding"),
   @Experimental
   CRYPTO_CIPHER_KEY_ALGORITHM_NAME("crypto.cipher.key.algorithm.name", 
"NullCipher", PropertyType.STRING,
   "States the name of the algorithm used for the key for the corresponding 
cipher suite. The key type must be compatible with the cipher suite."),
diff --git 
a/core/src/main/java/org/apache/accumulo/core/file/rfile/bcfile/BCFile.java 
b/core/src/main/java/org/apache/accumulo/core/file/rfile/bcfile/BCFile.java
index f9a61a7..e745583 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/bcfile/BCFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/bcfile/BCFile.java
@@ -160,9 +160,8 @@ public final class BCFile {
 // *This* is also very important. We don't want the underlying stream 
messed with.
 cryptoParams.setRecordParametersToStream(false);
 
-// It is also important to make sure we get a new initialization 
vector on every call in here,
-// so set any existing one to null, in case we're reusing a parameters 
object for its RNG or other bits
-cryptoParams.setInitializationVector(null);
+// Create a new IV for the block or update an existing one in the case 
of GCM
+

[accumulo] branch 1.7 updated: ACCUMULO-4546 Fix StringUtils import

2017-11-30 Thread mmiller
This is an automated email from the ASF dual-hosted git repository.

mmiller pushed a commit to branch 1.7
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/1.7 by this push:
 new 238e471  ACCUMULO-4546 Fix StringUtils import
238e471 is described below

commit 238e47116b16d7ea391860644a43f0dd44e75413
Author: Mike Miller 
AuthorDate: Thu Nov 30 13:54:42 2017 -0500

ACCUMULO-4546 Fix StringUtils import
---
 .../src/main/java/org/apache/accumulo/server/tables/TableManager.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/tables/TableManager.java 
b/server/base/src/main/java/org/apache/accumulo/server/tables/TableManager.java
index cb207ec..0777f3f 100644
--- 
a/server/base/src/main/java/org/apache/accumulo/server/tables/TableManager.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/tables/TableManager.java
@@ -41,7 +41,7 @@ import org.apache.accumulo.server.client.HdfsZooInstance;
 import org.apache.accumulo.server.util.TablePropUtil;
 import org.apache.accumulo.server.zookeeper.ZooCache;
 import org.apache.accumulo.server.zookeeper.ZooReaderWriter;
-import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang.StringUtils;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;

-- 
To stop receiving notification emails like this one, please contact
['"commits@accumulo.apache.org" '].


[accumulo] branch 1.8 updated (029c743 -> d4722bf)

2017-11-30 Thread mmiller
This is an automated email from the ASF dual-hosted git repository.

mmiller pushed a change to branch 1.8
in repository https://gitbox.apache.org/repos/asf/accumulo.git.


from 029c743  Merge branch '1.7' into 1.8
 add e738351  ACCUMULO-4546 Create default log message for table error 
(#327)
 add 238e471  ACCUMULO-4546 Fix StringUtils import
 new d4722bf  Merge branch '1.7' into 1.8

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../accumulo/server/tables/TableManager.java   | 20 ++-
 .../IllegalTableTransitionExceptionTest.java   | 68 ++
 2 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 
server/base/src/test/java/org/apache/accumulo/server/tables/IllegalTableTransitionExceptionTest.java

-- 
To stop receiving notification emails like this one, please contact
['"commits@accumulo.apache.org" '].


[accumulo] 01/01: Merge branch '1.7' into 1.8

2017-11-30 Thread mmiller
This is an automated email from the ASF dual-hosted git repository.

mmiller pushed a commit to branch 1.8
in repository https://gitbox.apache.org/repos/asf/accumulo.git

commit d4722bf1b45df2b5521ad494275a36127c9394f3
Merge: 029c743 238e471
Author: Mike Miller 
AuthorDate: Thu Nov 30 13:55:38 2017 -0500

Merge branch '1.7' into 1.8

 .../accumulo/server/tables/TableManager.java   | 20 ++-
 .../IllegalTableTransitionExceptionTest.java   | 68 ++
 2 files changed, 87 insertions(+), 1 deletion(-)

-- 
To stop receiving notification emails like this one, please contact
"commits@accumulo.apache.org" .


[accumulo] branch 1.7 updated: ACCUMULO-4546 Create default log message for table error (#327)

2017-11-30 Thread mmiller
This is an automated email from the ASF dual-hosted git repository.

mmiller pushed a commit to branch 1.7
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/1.7 by this push:
 new e738351  ACCUMULO-4546 Create default log message for table error 
(#327)
e738351 is described below

commit e738351fcdf2359997814c2cf105875702799265
Author: Mark Owens 
AuthorDate: Thu Nov 30 13:11:21 2017 -0500

ACCUMULO-4546 Create default log message for table error (#327)
---
 .../accumulo/server/tables/TableManager.java   | 20 ++-
 .../IllegalTableTransitionExceptionTest.java   | 68 ++
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/tables/TableManager.java 
b/server/base/src/main/java/org/apache/accumulo/server/tables/TableManager.java
index 0b23061..cb207ec 100644
--- 
a/server/base/src/main/java/org/apache/accumulo/server/tables/TableManager.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/tables/TableManager.java
@@ -41,6 +41,7 @@ import org.apache.accumulo.server.client.HdfsZooInstance;
 import org.apache.accumulo.server.util.TablePropUtil;
 import org.apache.accumulo.server.zookeeper.ZooCache;
 import org.apache.accumulo.server.zookeeper.ZooReaderWriter;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
@@ -115,10 +116,22 @@ public class TableManager {
 
 final TableState oldState;
 final TableState newState;
+final String message;
 
 public IllegalTableTransitionException(TableState oldState, TableState 
newState) {
+  this(oldState, newState, "");
+}
+
+public IllegalTableTransitionException(TableState oldState, TableState 
newState, String message) {
   this.oldState = oldState;
   this.newState = newState;
+
+  if (StringUtils.isNotEmpty(message))
+this.message = message;
+  else {
+String defaultMessage = "Error transitioning from " + oldState + " 
state to " + newState + " state";
+this.message = defaultMessage;
+  }
 }
 
 public TableState getOldState() {
@@ -129,6 +142,11 @@ public class TableManager {
   return newState;
 }
 
+@Override
+public String getMessage() {
+  return message;
+}
+
   }
 
   public synchronized void transitionTableState(final String tableId, final 
TableState newState) {
@@ -166,7 +184,7 @@ public class TableManager {
 }
   });
 } catch (Exception e) {
-  // ACCUMULO-3651 Changed level to error and added FATAL to message for 
slf4j compability
+  // ACCUMULO-3651 Changed level to error and added FATAL to message for 
slf4j compatibility
   log.error("FATAL Failed to transition table to state " + newState);
   throw new RuntimeException(e);
 }
diff --git 
a/server/base/src/test/java/org/apache/accumulo/server/tables/IllegalTableTransitionExceptionTest.java
 
b/server/base/src/test/java/org/apache/accumulo/server/tables/IllegalTableTransitionExceptionTest.java
new file mode 100644
index 000..d867ead
--- /dev/null
+++ 
b/server/base/src/test/java/org/apache/accumulo/server/tables/IllegalTableTransitionExceptionTest.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.server.tables;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.accumulo.core.master.state.tables.TableState;
+import 
org.apache.accumulo.server.tables.TableManager.IllegalTableTransitionException;
+import org.junit.Test;
+
+public class IllegalTableTransitionExceptionTest {
+
+  final TableState oldState = TableState.ONLINE;
+  final TableState newState = TableState.OFFLINE;
+  final String defaultMsg = "Error transitioning from " + oldState + " state 
to " + newState + " state";
+
+  @Test
+  public void testIllegalTableTransitionExceptionMessage() {
+String userMessage = null;
+try {
+  userMessage = "User suppled message - Exception from " + oldState + " 
state to " + newState + " state";
+  throw new