[1/2] accumulo git commit: ACCUMULO-2806: changed permissions of /accumulo to 700

2017-03-20 Thread mmiller
Repository: accumulo
Updated Branches:
  refs/heads/master 94cdcc4d3 -> cf69e3f67


ACCUMULO-2806: changed permissions of /accumulo to 700


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/f02b731a
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/f02b731a
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/f02b731a

Branch: refs/heads/master
Commit: f02b731a2b948266001cddb0c2ad5f1f5d25152f
Parents: dba4447
Author: Mike Miller 
Authored: Fri Feb 24 15:32:36 2017 -0500
Committer: Mike Miller 
Committed: Mon Mar 20 12:52:52 2017 -0400

--
 .../main/java/org/apache/accumulo/server/fs/VolumeManager.java  | 4 
 .../java/org/apache/accumulo/server/fs/VolumeManagerImpl.java   | 5 +
 .../main/java/org/apache/accumulo/server/init/Initialize.java   | 3 ++-
 3 files changed, 11 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/accumulo/blob/f02b731a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java
--
diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java 
b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java
index e761e4f..69f883f 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 
 import com.google.common.base.Optional;
+import org.apache.hadoop.fs.permission.FsPermission;
 
 /**
  * A wrapper around multiple hadoop FileSystem objects, which are assumed to 
be different volumes. This also concentrates a bunch of meta-operations like
@@ -126,6 +127,9 @@ public interface VolumeManager {
   boolean mkdirs(Path directory) throws IOException;
 
   // forward to the appropriate FileSystem object
+  boolean mkdirs(Path path, FsPermission permission) throws IOException;
+
+  // forward to the appropriate FileSystem object
   FSDataInputStream open(Path path) throws IOException;
 
   // forward to the appropriate FileSystem object, throws an exception if the 
paths are in different volumes

http://git-wip-us.apache.org/repos/asf/accumulo/blob/f02b731a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java
--
diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java
 
b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java
index 116cb0c..4758421 100644
--- 
a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java
@@ -274,6 +274,11 @@ public class VolumeManagerImpl implements VolumeManager {
   }
 
   @Override
+  public boolean mkdirs(Path path, FsPermission permission) throws IOException 
{
+return getVolumeByPath(path).getFileSystem().mkdirs(path, permission);
+  }
+
+  @Override
   public FSDataInputStream open(Path path) throws IOException {
 return getVolumeByPath(path).getFileSystem().open(path);
   }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/f02b731a/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java
--
diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java 
b/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java
index 6531787..12c4a6e 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java
@@ -103,6 +103,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.zookeeper.KeeperException;
@@ -402,7 +403,7 @@ public class Initialize implements KeywordExecutable {
 
   private static void initDirs(VolumeManager fs, UUID uuid, String[] baseDirs, 
boolean print) throws IOException {
 for (String baseDir : baseDirs) {
-  fs.mkdirs(new Path(new Path(baseDir, ServerConstants.VERSION_DIR), "" + 
ServerConstants.DATA_VERSION));
+  fs.mkdirs(new Path(new Path(baseDir, ServerConstants.VERSION_DIR), "" + 
ServerConstants.DATA_VERSION), new FsPermission("700"));
 
   // create an instance id
   Path iidLocation = new Path(baseDir, ServerConstants.INSTANCE_ID_DIR);


[2/2] accumulo git commit: Merge branch '1.8'

2017-03-20 Thread mmiller
Merge branch '1.8'

 Conflicts:

server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/cf69e3f6
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/cf69e3f6
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/cf69e3f6

Branch: refs/heads/master
Commit: cf69e3f675b8b3445a0e01de4331f0fd16e1e43f
Parents: 94cdcc4 f02b731
Author: Mike Miller 
Authored: Mon Mar 20 13:58:34 2017 -0400
Committer: Mike Miller 
Committed: Mon Mar 20 13:58:34 2017 -0400

--
 .../main/java/org/apache/accumulo/server/fs/VolumeManager.java  | 4 
 .../java/org/apache/accumulo/server/fs/VolumeManagerImpl.java   | 5 +
 .../main/java/org/apache/accumulo/server/init/Initialize.java   | 3 ++-
 3 files changed, 11 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/accumulo/blob/cf69e3f6/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java
--
diff --cc 
server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java
index 09436a5,69f883f..06ece48
--- a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java
@@@ -28,6 -27,9 +28,7 @@@ import org.apache.hadoop.fs.FSDataInput
  import org.apache.hadoop.fs.FSDataOutputStream;
  import org.apache.hadoop.fs.FileStatus;
  import org.apache.hadoop.fs.Path;
 -
 -import com.google.common.base.Optional;
+ import org.apache.hadoop.fs.permission.FsPermission;
  
  /**
   * A wrapper around multiple hadoop FileSystem objects, which are assumed to 
be different volumes. This also concentrates a bunch of meta-operations like

http://git-wip-us.apache.org/repos/asf/accumulo/blob/cf69e3f6/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java
--

http://git-wip-us.apache.org/repos/asf/accumulo/blob/cf69e3f6/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java
--



accumulo git commit: ACCUMULO-2806: changed permissions of /accumulo to 700

2017-03-20 Thread mmiller
Repository: accumulo
Updated Branches:
  refs/heads/1.8 dba444757 -> f02b731a2


ACCUMULO-2806: changed permissions of /accumulo to 700


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/f02b731a
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/f02b731a
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/f02b731a

Branch: refs/heads/1.8
Commit: f02b731a2b948266001cddb0c2ad5f1f5d25152f
Parents: dba4447
Author: Mike Miller 
Authored: Fri Feb 24 15:32:36 2017 -0500
Committer: Mike Miller 
Committed: Mon Mar 20 12:52:52 2017 -0400

--
 .../main/java/org/apache/accumulo/server/fs/VolumeManager.java  | 4 
 .../java/org/apache/accumulo/server/fs/VolumeManagerImpl.java   | 5 +
 .../main/java/org/apache/accumulo/server/init/Initialize.java   | 3 ++-
 3 files changed, 11 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/accumulo/blob/f02b731a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java
--
diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java 
b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java
index e761e4f..69f883f 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 
 import com.google.common.base.Optional;
+import org.apache.hadoop.fs.permission.FsPermission;
 
 /**
  * A wrapper around multiple hadoop FileSystem objects, which are assumed to 
be different volumes. This also concentrates a bunch of meta-operations like
@@ -126,6 +127,9 @@ public interface VolumeManager {
   boolean mkdirs(Path directory) throws IOException;
 
   // forward to the appropriate FileSystem object
+  boolean mkdirs(Path path, FsPermission permission) throws IOException;
+
+  // forward to the appropriate FileSystem object
   FSDataInputStream open(Path path) throws IOException;
 
   // forward to the appropriate FileSystem object, throws an exception if the 
paths are in different volumes

http://git-wip-us.apache.org/repos/asf/accumulo/blob/f02b731a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java
--
diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java
 
b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java
index 116cb0c..4758421 100644
--- 
a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java
@@ -274,6 +274,11 @@ public class VolumeManagerImpl implements VolumeManager {
   }
 
   @Override
+  public boolean mkdirs(Path path, FsPermission permission) throws IOException 
{
+return getVolumeByPath(path).getFileSystem().mkdirs(path, permission);
+  }
+
+  @Override
   public FSDataInputStream open(Path path) throws IOException {
 return getVolumeByPath(path).getFileSystem().open(path);
   }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/f02b731a/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java
--
diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java 
b/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java
index 6531787..12c4a6e 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java
@@ -103,6 +103,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.zookeeper.KeeperException;
@@ -402,7 +403,7 @@ public class Initialize implements KeywordExecutable {
 
   private static void initDirs(VolumeManager fs, UUID uuid, String[] baseDirs, 
boolean print) throws IOException {
 for (String baseDir : baseDirs) {
-  fs.mkdirs(new Path(new Path(baseDir, ServerConstants.VERSION_DIR), "" + 
ServerConstants.DATA_VERSION));
+  fs.mkdirs(new Path(new Path(baseDir, ServerConstants.VERSION_DIR), "" + 
ServerConstants.DATA_VERSION), new FsPermission("700"));
 
   // create an instance id
   Path iidLocation = new Path(baseDir, ServerConstants.INSTANCE_ID_DIR);



[1/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization

2017-03-20 Thread kturner
Repository: accumulo
Updated Branches:
  refs/heads/master 68ba2ef11 -> 94cdcc4d3


http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/test/src/main/java/org/apache/accumulo/test/functional/SummaryIT.java
--
diff --git 
a/test/src/main/java/org/apache/accumulo/test/functional/SummaryIT.java 
b/test/src/main/java/org/apache/accumulo/test/functional/SummaryIT.java
new file mode 100644
index 000..919307b
--- /dev/null
+++ b/test/src/main/java/org/apache/accumulo/test/functional/SummaryIT.java
@@ -0,0 +1,820 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.test.functional;
+
+import static java.util.function.Function.identity;
+import static java.util.stream.Collectors.counting;
+import static java.util.stream.Collectors.groupingBy;
+import static org.apache.accumulo.test.functional.BasicSummarizer.DELETES_STAT;
+import static 
org.apache.accumulo.test.functional.BasicSummarizer.MAX_TIMESTAMP_STAT;
+import static 
org.apache.accumulo.test.functional.BasicSummarizer.MIN_TIMESTAMP_STAT;
+import static org.apache.accumulo.test.functional.BasicSummarizer.TOTAL_STAT;
+import static 
org.apache.accumulo.core.client.summary.CountingSummarizer.DELETES_IGNORED_STAT;
+import static 
org.apache.accumulo.core.client.summary.CountingSummarizer.EMITTED_STAT;
+import static 
org.apache.accumulo.core.client.summary.CountingSummarizer.SEEN_STAT;
+import static 
org.apache.accumulo.core.client.summary.CountingSummarizer.TOO_LONG_STAT;
+import static 
org.apache.accumulo.core.client.summary.CountingSummarizer.TOO_MANY_STAT;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.LongSummaryStatistics;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Random;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.regex.PatternSyntaxException;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.TableOfflineException;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.admin.CompactionConfig;
+import org.apache.accumulo.core.client.admin.CompactionStrategyConfig;
+import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.client.impl.AccumuloServerException;
+import org.apache.accumulo.core.client.security.SecurityErrorCode;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.client.summary.CounterSummary;
+import org.apache.accumulo.core.client.summary.Summarizer;
+import org.apache.accumulo.core.client.summary.SummarizerConfiguration;
+import org.apache.accumulo.core.client.summary.Summary;
+import org.apache.accumulo.core.client.summary.Summary.FileStatistics;
+import org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer;
+import 
org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.Filter;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.security.TablePermission;
+import org.apache.accumulo.fate.util.UtilWaitThread;
+import org.apache.accumulo.harness.AccumuloClusterHarness;
+import org.apache.accumulo.tserver.compaction.CompactionPlan;
+import 

[4/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization

2017-03-20 Thread kturner
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java
--
diff --git 
a/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java
 
b/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java
index 641556c..c45d91f 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java
@@ -90,6 +90,14 @@ public class TabletClientService {
 
 public java.util.List 
getActiveLogs(org.apache.accumulo.core.trace.thrift.TInfo tinfo, 
org.apache.accumulo.core.security.thrift.TCredentials credentials) throws 
org.apache.thrift.TException;
 
+public org.apache.accumulo.core.data.thrift.TSummaries 
startGetSummaries(org.apache.accumulo.core.trace.thrift.TInfo tinfo, 
org.apache.accumulo.core.security.thrift.TCredentials credentials, 
org.apache.accumulo.core.data.thrift.TSummaryRequest request) throws 
org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, 
org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException, 
org.apache.thrift.TException;
+
+public org.apache.accumulo.core.data.thrift.TSummaries 
startGetSummariesForPartition(org.apache.accumulo.core.trace.thrift.TInfo 
tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, 
org.apache.accumulo.core.data.thrift.TSummaryRequest request, int modulus, int 
remainder) throws 
org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, 
org.apache.thrift.TException;
+
+public org.apache.accumulo.core.data.thrift.TSummaries 
startGetSummariesFromFiles(org.apache.accumulo.core.trace.thrift.TInfo tinfo, 
org.apache.accumulo.core.security.thrift.TCredentials credentials, 
org.apache.accumulo.core.data.thrift.TSummaryRequest request, 
java.util.Map
 files) throws 
org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, 
org.apache.thrift.TException;
+
+public org.apache.accumulo.core.data.thrift.TSummaries 
contiuneGetSummaries(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long 
sessionId) throws NoSuchScanIDException, org.apache.thrift.TException;
+
   }
 
   public interface AsyncIface extends 
org.apache.accumulo.core.client.impl.thrift.ClientService .AsyncIface {
@@ -156,6 +164,14 @@ public class TabletClientService {
 
 public void getActiveLogs(org.apache.accumulo.core.trace.thrift.TInfo 
tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, 
org.apache.thrift.async.AsyncMethodCallback 
resultHandler) throws org.apache.thrift.TException;
 
+public void startGetSummaries(org.apache.accumulo.core.trace.thrift.TInfo 
tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, 
org.apache.accumulo.core.data.thrift.TSummaryRequest request, 
org.apache.thrift.async.AsyncMethodCallback
 resultHandler) throws org.apache.thrift.TException;
+
+public void 
startGetSummariesForPartition(org.apache.accumulo.core.trace.thrift.TInfo 
tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, 
org.apache.accumulo.core.data.thrift.TSummaryRequest request, int modulus, int 
remainder, 
org.apache.thrift.async.AsyncMethodCallback
 resultHandler) throws org.apache.thrift.TException;
+
+public void 
startGetSummariesFromFiles(org.apache.accumulo.core.trace.thrift.TInfo tinfo, 
org.apache.accumulo.core.security.thrift.TCredentials credentials, 
org.apache.accumulo.core.data.thrift.TSummaryRequest request, 
java.util.Map
 files, 
org.apache.thrift.async.AsyncMethodCallback
 resultHandler) throws org.apache.thrift.TException;
+
+public void 
contiuneGetSummaries(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long 
sessionId, 
org.apache.thrift.async.AsyncMethodCallback
 resultHandler) throws org.apache.thrift.TException;
+
   }
 
   public static class Client extends 
org.apache.accumulo.core.client.impl.thrift.ClientService.Client implements 
Iface {
@@ -934,6 +950,123 @@ public class TabletClientService {
   throw new 
org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT,
 "getActiveLogs failed: unknown result");
 }
 
+public org.apache.accumulo.core.data.thrift.TSummaries 
startGetSummaries(org.apache.accumulo.core.trace.thrift.TInfo tinfo, 
org.apache.accumulo.core.security.thrift.TCredentials credentials, 
org.apache.accumulo.core.data.thrift.TSummaryRequest request) throws 
org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, 
org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException, 
org.apache.thrift.TException
+{
+  send_startGetSummaries(tinfo, credentials, request);
+  return 

[7/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization

2017-03-20 Thread kturner
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaries.java
--
diff --git 
a/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaries.java 
b/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaries.java
new file mode 100644
index 000..1c860ff
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaries.java
@@ -0,0 +1,831 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Autogenerated by Thrift Compiler (0.10.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.accumulo.core.data.thrift;
+
+@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked", "unused"})
+@javax.annotation.Generated(value = "Autogenerated by Thrift Compiler 
(0.10.0)")
+public class TSummaries implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new 
org.apache.thrift.protocol.TStruct("TSummaries");
+
+  private static final org.apache.thrift.protocol.TField FINISHED_FIELD_DESC = 
new org.apache.thrift.protocol.TField("finished", 
org.apache.thrift.protocol.TType.BOOL, (short)1);
+  private static final org.apache.thrift.protocol.TField SESSION_ID_FIELD_DESC 
= new org.apache.thrift.protocol.TField("sessionId", 
org.apache.thrift.protocol.TType.I64, (short)2);
+  private static final org.apache.thrift.protocol.TField 
TOTAL_FILES_FIELD_DESC = new org.apache.thrift.protocol.TField("totalFiles", 
org.apache.thrift.protocol.TType.I64, (short)3);
+  private static final org.apache.thrift.protocol.TField 
DELETED_FILES_FIELD_DESC = new 
org.apache.thrift.protocol.TField("deletedFiles", 
org.apache.thrift.protocol.TType.I64, (short)4);
+  private static final org.apache.thrift.protocol.TField SUMMARIES_FIELD_DESC 
= new org.apache.thrift.protocol.TField("summaries", 
org.apache.thrift.protocol.TType.LIST, (short)5);
+
+  private static final org.apache.thrift.scheme.SchemeFactory 
STANDARD_SCHEME_FACTORY = new TSummariesStandardSchemeFactory();
+  private static final org.apache.thrift.scheme.SchemeFactory 
TUPLE_SCHEME_FACTORY = new TSummariesTupleSchemeFactory();
+
+  public boolean finished; // required
+  public long sessionId; // required
+  public long totalFiles; // required
+  public long deletedFiles; // required
+  public java.util.List summaries; // required
+
+  /** The set of fields this struct contains, along with convenience methods 
for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+FINISHED((short)1, "finished"),
+SESSION_ID((short)2, "sessionId"),
+TOTAL_FILES((short)3, "totalFiles"),
+DELETED_FILES((short)4, "deletedFiles"),
+SUMMARIES((short)5, "summaries");
+
+private static final java.util.Map byName = new 
java.util.HashMap();
+
+static {
+  for (_Fields field : java.util.EnumSet.allOf(_Fields.class)) {
+byName.put(field.getFieldName(), field);
+  }
+}
+
+/**
+ * Find the _Fields constant that matches fieldId, or null if its not 
found.
+ */
+public static _Fields findByThriftId(int fieldId) {
+  switch(fieldId) {
+case 1: // FINISHED
+  return FINISHED;
+case 2: // SESSION_ID
+  return SESSION_ID;
+case 3: // TOTAL_FILES
+  return TOTAL_FILES;
+case 4: // DELETED_FILES
+  return DELETED_FILES;
+case 5: // SUMMARIES
+  return SUMMARIES;
+default:
+  return null;
+  }
+}
+
+/**
+ * Find the _Fields constant that matches fieldId, throwing an exception
+ * if it is not found.
+ */
+public static _Fields findByThriftIdOrThrow(int fieldId) {
+  _Fields fields = findByThriftId(fieldId);
+  if (fields == null) throw new java.lang.IllegalArgumentException("Field 
" + fieldId + " doesn't exist!");
+  return fields;
+}
+
+/**
+ * Find the _Fields constant 

[9/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization

2017-03-20 Thread kturner
ACCUMULO-4501 ACCUMULO-96 Added Summarization

closes apache/accumulo#224
closes apache/accumulo#168


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/94cdcc4d
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/94cdcc4d
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/94cdcc4d

Branch: refs/heads/master
Commit: 94cdcc4d3f0a8ccf95894f206cb71e6117f4e51d
Parents: 68ba2ef
Author: Keith Turner 
Authored: Mon Mar 20 10:47:00 2017 -0400
Committer: Keith Turner 
Committed: Mon Mar 20 10:47:00 2017 -0400

--
 .../client/admin/NewTableConfiguration.java |   49 +-
 .../core/client/admin/SummaryRetriever.java |  112 +
 .../core/client/admin/TableOperations.java  |   65 +
 .../accumulo/core/client/impl/ServerClient.java |   36 +-
 .../core/client/impl/TableOperationsImpl.java   |  151 +-
 .../client/mapred/AccumuloFileOutputFormat.java |   16 +
 .../mapreduce/AccumuloFileOutputFormat.java |   16 +
 .../lib/impl/FileOutputConfigurator.java|9 +
 .../core/client/mock/MockTableOperations.java   |   27 +
 .../accumulo/core/client/rfile/RFile.java   |  129 +
 .../core/client/rfile/RFileScannerBuilder.java  |2 +
 .../client/rfile/RFileSummariesRetriever.java   |  122 +
 .../accumulo/core/client/rfile/RFileWriter.java |   26 +
 .../core/client/rfile/RFileWriterBuilder.java   |   38 +-
 .../core/client/summary/CounterSummary.java |  123 +
 .../core/client/summary/CountingSummarizer.java |  302 +
 .../core/client/summary/Summarizer.java |  227 +
 .../client/summary/SummarizerConfiguration.java |  285 +
 .../accumulo/core/client/summary/Summary.java   |  145 +
 .../summary/summarizers/DeletesSummarizer.java  |   75 +
 .../summary/summarizers/FamilySummarizer.java   |   46 +
 .../summarizers/VisibilitySummarizer.java   |   47 +
 .../core/compaction/CompactionSettings.java |2 +
 .../org/apache/accumulo/core/conf/Property.java |   22 +-
 .../apache/accumulo/core/conf/PropertyType.java |9 +-
 .../accumulo/core/data/ArrayByteSequence.java   |   18 +
 .../accumulo/core/data/thrift/TRowRange.java|  521 ++
 .../accumulo/core/data/thrift/TSummaries.java   |  831 +++
 .../data/thrift/TSummarizerConfiguration.java   |  649 ++
 .../accumulo/core/data/thrift/TSummary.java |  842 +++
 .../core/data/thrift/TSummaryRequest.java   |  760 +++
 .../accumulo/core/file/BloomFilterLayer.java|   10 +-
 .../core/file/DispatchingFileFactory.java   |7 +-
 .../accumulo/core/file/FileOperations.java  |   18 +
 .../accumulo/core/file/rfile/PrintInfo.java |7 +
 .../core/file/rfile/RFileOperations.java|2 +-
 .../core/metadata/schema/MetadataScanner.java   |  236 +
 .../core/metadata/schema/TabletMetadata.java|  182 +
 .../sample/impl/SamplerConfigurationImpl.java   |   12 -
 .../core/sample/impl/SamplerFactory.java|8 +-
 .../accumulo/core/security/TablePermission.java |5 +-
 .../apache/accumulo/core/summary/Gatherer.java  |  631 ++
 .../summary/SummarizerConfigurationUtil.java|  128 +
 .../core/summary/SummarizerFactory.java |   63 +
 .../core/summary/SummaryCollection.java |  188 +
 .../accumulo/core/summary/SummaryInfo.java  |   53 +
 .../accumulo/core/summary/SummaryReader.java|  257 +
 .../core/summary/SummarySerializer.java |  542 ++
 .../accumulo/core/summary/SummaryWriter.java|  157 +
 .../thrift/TabletClientService.java | 5642 +-
 .../accumulo/core/util/CancelFlagFuture.java|   67 +
 .../core/util/CompletableFutureUtil.java|   49 +
 core/src/main/thrift/data.thrift|   34 +
 core/src/main/thrift/tabletserver.thrift|5 +
 .../client/impl/TableOperationsHelperTest.java  |   26 +
 .../mapred/AccumuloFileOutputFormatTest.java|   18 +
 .../mapreduce/AccumuloFileOutputFormatTest.java |   18 +
 .../accumulo/core/client/rfile/RFileTest.java   |  158 +-
 .../client/summary/CountingSummarizerTest.java  |  259 +
 .../core/summary/SummaryCollectionTest.java |   72 +
 .../core/util/CompletableFutureUtilTest.java|   53 +
 .../main/asciidoc/accumulo_user_manual.asciidoc |2 +
 docs/src/main/asciidoc/chapters/summaries.txt   |  232 +
 .../standalone/StandaloneAccumuloCluster.java   |3 +-
 .../standalone/StandaloneClusterControl.java|3 +-
 .../impl/MiniAccumuloConfigImpl.java|3 +-
 .../server/security/SecurityOperation.java  |5 +
 .../apache/accumulo/tserver/TabletServer.java   |  115 +
 .../tserver/TabletServerResourceManager.java|   49 +-
 .../tserver/compaction/CompactionStrategy.java  |1 -
 .../compaction/MajorCompactionRequest.java  |   84 +-
 .../ConfigurableCompactionStrategy.java |   99 +-
 .../TooManyDeletesCompactionStrategy.java   |  173 +
 

[8/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization

2017-03-20 Thread kturner
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/client/summary/Summarizer.java
--
diff --git 
a/core/src/main/java/org/apache/accumulo/core/client/summary/Summarizer.java 
b/core/src/main/java/org/apache/accumulo/core/client/summary/Summarizer.java
new file mode 100644
index 000..fdb194b
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/client/summary/Summarizer.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.client.summary;
+
+import java.util.Map;
+
+import org.apache.accumulo.core.client.admin.TableOperations;
+import org.apache.accumulo.core.client.rfile.RFile;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+//checkstyle and the formatter are in conflict, so turn off the formatter
+//@formatter:off
+/**
+ * 
+ * Instances of this interface can be configured for Accumulo tables. When 
Accumulo compacts files, it will use this Factory to create {@link Collector} 
and
+ * {@link Combiner} objects to generate summary information about the data in 
the file.
+ *
+ * 
+ * In order to merge summary information from multiple files, Accumulo will 
use this factory to create a {@link Combiner} object.
+ *
+ * 
+ * Below is an example of a very simple summarizer that will compute the 
number of deletes, total number of keys, min timestamp and max timestamp.
+ *
+ * 
+ * 
+ *   public class BasicSummarizer implements Summarizer {
+ *
+ * public static final String DELETES_STAT = deletes;
+ * public static final String MIN_STAMP_STAT = minStamp;
+ * public static final String MAX_STAMP_STAT = maxStamp;
+ * public static final String TOTAL_STAT = total;
+ *
+ * Override
+ * public Collector collector(SummarizerConfiguration sc) {
+ *   return new Collector() {
+ *
+ * private long minStamp = Long.MAX_VALUE;
+ * private long maxStamp = Long.MIN_VALUE;
+ * private long deletes = 0;
+ * private long total = 0;
+ *
+ * Override
+ * public void accept(Key k, Value v) {
+ *   if (k.getTimestamp()  minStamp) {
+ * minStamp = k.getTimestamp();
+ *   }
+ *
+ *   if (k.getTimestamp()  maxStamp) {
+ * maxStamp = k.getTimestamp();
+ *   }
+ *
+ *   if (k.isDeleted()) {
+ * deletes++;
+ *   }
+ *
+ *   total++;
+ * }
+ *
+ * Override
+ * public void summarize(StatisticConsumer sc) {
+ *   sc.accept(MIN_STAMP_STAT, minStamp);
+ *   sc.accept(MAX_STAMP_STAT, maxStamp);
+ *   sc.accept(DELETES_STAT, deletes);
+ *   sc.accept(TOTAL_STAT, total);
+ * }
+ *   };
+ * }
+ *
+ * Override
+ * public Combiner combiner(SummarizerConfiguration sc) {
+ *   return (stats1, stats2) - {
+ * stats1.merge(DELETES_STAT, stats2.get(DELETES_STAT), Long::sum);
+ * stats1.merge(TOTAL_STAT, stats2.get(TOTAL_STAT), Long::sum);
+ * stats1.merge(MIN_STAMP_STAT, stats2.get(MIN_STAMP_STAT), Long::min);
+ * stats1.merge(MAX_STAMP_STAT, stats2.get(MAX_STAMP_STAT), Long::max);
+ *   };
+ * }
+ *   }
+ * 
+ * 
+ *
+ * 
+ * Below is an example summarizer that counts the log of the value length.
+ *
+ * 
+ * 
+ * public class ValueLogLengthSummarizer implements Summarizer {
+ *
+ *  Override
+ *  public Collector collector(SummarizerConfiguration sc) {
+ *
+ *return new Collector(){
+ *
+ *  long[] counts = new long[32];
+ *
+ *  Override
+ *  public void accept(Key k, Value v) {
+ *int idx;
+ *if(v.getSize() == 0)
+ *  idx = 0;
+ *else
+ *  idx = IntMath.log2(v.getSize(), RoundingMode.UP);  //IntMath is 
from Guava
+ *
+ *counts[idx]++;
+ *  }
+ *
+ *  Override
+ *  public void summarize(StatisticConsumer sc) {
+ *for (int i = 0; i  counts.length; i++) {
+ *  if(counts[i]  0) {
+ *sc.accept(""+(1i), counts[i]);
+ *  }
+ *}
+ *  }
+ *};
+ *  }
+ *
+ *  

[2/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization

2017-03-20 Thread kturner
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/MajorCompactionRequest.java
--
diff --git 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/MajorCompactionRequest.java
 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/MajorCompactionRequest.java
index 08bff26..e29d30c 100644
--- 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/MajorCompactionRequest.java
+++ 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/MajorCompactionRequest.java
@@ -17,9 +17,20 @@
 package org.apache.accumulo.tserver.compaction;
 
 import java.io.IOException;
+import java.util.Collection;
 import java.util.Collections;
+import java.util.List;
 import java.util.Map;
+import java.util.function.Predicate;
 
+import org.apache.accumulo.core.client.admin.TableOperations;
+import org.apache.accumulo.core.client.mapred.AccumuloFileOutputFormat;
+import org.apache.accumulo.core.client.rfile.RFile.WriterOptions;
+import org.apache.accumulo.core.client.summary.Summarizer;
+import org.apache.accumulo.core.client.summary.Summarizer.Combiner;
+import org.apache.accumulo.core.client.summary.SummarizerConfiguration;
+import org.apache.accumulo.core.client.summary.Summary;
+import org.apache.accumulo.core.client.summary.Summary.FileStatistics;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.data.TabletId;
@@ -27,11 +38,21 @@ import org.apache.accumulo.core.data.impl.KeyExtent;
 import org.apache.accumulo.core.data.impl.TabletIdImpl;
 import org.apache.accumulo.core.file.FileOperations;
 import org.apache.accumulo.core.file.FileSKVIterator;
+import org.apache.accumulo.core.file.blockfile.cache.BlockCache;
 import org.apache.accumulo.core.metadata.schema.DataFileValue;
+import org.apache.accumulo.core.summary.SummaryReader;
+import org.apache.accumulo.core.summary.Gatherer;
+import org.apache.accumulo.core.summary.SummarizerFactory;
+import org.apache.accumulo.core.summary.SummaryCollection;
+import org.apache.accumulo.core.util.CachedConfiguration;
 import org.apache.accumulo.server.fs.FileRef;
 import org.apache.accumulo.server.fs.VolumeManager;
+import 
org.apache.accumulo.tserver.compaction.strategies.TooManyDeletesCompactionStrategy;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 
+import com.google.common.base.Preconditions;
+
 /**
  * Information that can be used to determine how a tablet is to be major 
compacted, if needed.
  */
@@ -40,18 +61,27 @@ public class MajorCompactionRequest implements Cloneable {
   final private MajorCompactionReason reason;
   final private VolumeManager volumeManager;
   final private AccumuloConfiguration tableConfig;
+  final private BlockCache indexCache;
+  final private BlockCache summaryCache;
   private Map files;
 
-  public MajorCompactionRequest(KeyExtent extent, MajorCompactionReason 
reason, VolumeManager manager, AccumuloConfiguration tabletConfig) {
+  public MajorCompactionRequest(KeyExtent extent, MajorCompactionReason 
reason, VolumeManager manager, AccumuloConfiguration tabletConfig,
+  BlockCache summaryCache, BlockCache indexCache) {
 this.extent = extent;
 this.reason = reason;
 this.volumeManager = manager;
 this.tableConfig = tabletConfig;
 this.files = Collections.emptyMap();
+this.summaryCache = summaryCache;
+this.indexCache = indexCache;
+  }
+
+  public MajorCompactionRequest(KeyExtent extent, MajorCompactionReason 
reason, AccumuloConfiguration tabletConfig) {
+this(extent, reason, null, tabletConfig, null, null);
   }
 
   public MajorCompactionRequest(MajorCompactionRequest mcr) {
-this(mcr.extent, mcr.reason, mcr.volumeManager, mcr.tableConfig);
+this(mcr.extent, mcr.reason, mcr.volumeManager, mcr.tableConfig, 
mcr.summaryCache, mcr.indexCache);
 // know this is already unmodifiable, no need to wrap again
 this.files = mcr.files;
   }
@@ -68,11 +98,61 @@ public class MajorCompactionRequest implements Cloneable {
 return files;
   }
 
+  /**
+   * Returns all summaries present in each file.
+   *
+   * 
+   * This method can only be called from {@link 
CompactionStrategy#gatherInformation(MajorCompactionRequest)}. Unfortunately, 
{@code gatherInformation()} is not
+   * called before {@link 
CompactionStrategy#shouldCompact(MajorCompactionRequest)}. Therefore {@code 
shouldCompact()) should just return true when a compactions strategy
+   * wants to use summary information.
+   *
+   * 
+   * When using summaries to make compaction decisions, its important to 
ensure that all summary data fits in the tablet server summary cache. The size 
of this
+   * cache is configured by code tserver.cache.summary.size}. Also its 
important to use the 

[3/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization

2017-03-20 Thread kturner
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/util/CancelFlagFuture.java
--
diff --git 
a/core/src/main/java/org/apache/accumulo/core/util/CancelFlagFuture.java 
b/core/src/main/java/org/apache/accumulo/core/util/CancelFlagFuture.java
new file mode 100644
index 000..9da91c0
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/util/CancelFlagFuture.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.util;
+
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * A simple future wrapper that will set an atomic boolean to true if a future 
is successfully canceled
+ */
+public class CancelFlagFuture implements Future {
+
+  private Future wrappedFuture;
+  private AtomicBoolean cancelFlag;
+
+  public CancelFlagFuture(Future wrappedFuture, AtomicBoolean cancelFlag) {
+this.wrappedFuture = wrappedFuture;
+this.cancelFlag = cancelFlag;
+  }
+
+  @Override
+  public boolean cancel(boolean mayInterruptIfRunning) {
+boolean ret = wrappedFuture.cancel(mayInterruptIfRunning);
+if (ret) {
+  cancelFlag.set(true);
+}
+return ret;
+  }
+
+  @Override
+  public boolean isCancelled() {
+return wrappedFuture.isCancelled();
+  }
+
+  @Override
+  public boolean isDone() {
+return wrappedFuture.isDone();
+  }
+
+  @Override
+  public T get() throws InterruptedException, ExecutionException {
+return wrappedFuture.get();
+  }
+
+  @Override
+  public T get(long timeout, TimeUnit unit) throws InterruptedException, 
ExecutionException, TimeoutException {
+return wrappedFuture.get(timeout, unit);
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/util/CompletableFutureUtil.java
--
diff --git 
a/core/src/main/java/org/apache/accumulo/core/util/CompletableFutureUtil.java 
b/core/src/main/java/org/apache/accumulo/core/util/CompletableFutureUtil.java
new file mode 100644
index 000..c417b0f
--- /dev/null
+++ 
b/core/src/main/java/org/apache/accumulo/core/util/CompletableFutureUtil.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.util;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+import java.util.function.BiFunction;
+import java.util.function.Supplier;
+
+public class CompletableFutureUtil {
+
+  // create a binary tree of completable future operations, where each node in 
the tree merges the results of their children when complete
+  public static  CompletableFuture merge(List 
futures, BiFunction mergeFunc, Supplier nothing) {
+if (futures.size() == 0) {
+  return CompletableFuture.completedFuture(nothing.get());
+}
+while (futures.size() > 1) {
+  ArrayList mergedFutures = new 
ArrayList<>(futures.size() / 2);
+  for (int i = 0; i < futures.size(); i += 2) {
+if (i + 1 == futures.size()) {
+  mergedFutures.add(futures.get(i));
+} else {
+  

[6/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization

2017-03-20 Thread kturner
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaryRequest.java
--
diff --git 
a/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaryRequest.java 
b/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaryRequest.java
new file mode 100644
index 000..78c242e
--- /dev/null
+++ 
b/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaryRequest.java
@@ -0,0 +1,760 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Autogenerated by Thrift Compiler (0.10.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.accumulo.core.data.thrift;
+
+@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked", "unused"})
+@javax.annotation.Generated(value = "Autogenerated by Thrift Compiler 
(0.10.0)")
+public class TSummaryRequest implements 
org.apache.thrift.TBase, 
java.io.Serializable, Cloneable, Comparable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new 
org.apache.thrift.protocol.TStruct("TSummaryRequest");
+
+  private static final org.apache.thrift.protocol.TField TABLE_ID_FIELD_DESC = 
new org.apache.thrift.protocol.TField("tableId", 
org.apache.thrift.protocol.TType.STRING, (short)1);
+  private static final org.apache.thrift.protocol.TField BOUNDS_FIELD_DESC = 
new org.apache.thrift.protocol.TField("bounds", 
org.apache.thrift.protocol.TType.STRUCT, (short)2);
+  private static final org.apache.thrift.protocol.TField 
SUMMARIZERS_FIELD_DESC = new org.apache.thrift.protocol.TField("summarizers", 
org.apache.thrift.protocol.TType.LIST, (short)3);
+  private static final org.apache.thrift.protocol.TField 
SUMMARIZER_PATTERN_FIELD_DESC = new 
org.apache.thrift.protocol.TField("summarizerPattern", 
org.apache.thrift.protocol.TType.STRING, (short)4);
+
+  private static final org.apache.thrift.scheme.SchemeFactory 
STANDARD_SCHEME_FACTORY = new TSummaryRequestStandardSchemeFactory();
+  private static final org.apache.thrift.scheme.SchemeFactory 
TUPLE_SCHEME_FACTORY = new TSummaryRequestTupleSchemeFactory();
+
+  public java.lang.String tableId; // required
+  public TRowRange bounds; // required
+  public java.util.List summarizers; // required
+  public java.lang.String summarizerPattern; // required
+
+  /** The set of fields this struct contains, along with convenience methods 
for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+TABLE_ID((short)1, "tableId"),
+BOUNDS((short)2, "bounds"),
+SUMMARIZERS((short)3, "summarizers"),
+SUMMARIZER_PATTERN((short)4, "summarizerPattern");
+
+private static final java.util.Map byName = new 
java.util.HashMap();
+
+static {
+  for (_Fields field : java.util.EnumSet.allOf(_Fields.class)) {
+byName.put(field.getFieldName(), field);
+  }
+}
+
+/**
+ * Find the _Fields constant that matches fieldId, or null if its not 
found.
+ */
+public static _Fields findByThriftId(int fieldId) {
+  switch(fieldId) {
+case 1: // TABLE_ID
+  return TABLE_ID;
+case 2: // BOUNDS
+  return BOUNDS;
+case 3: // SUMMARIZERS
+  return SUMMARIZERS;
+case 4: // SUMMARIZER_PATTERN
+  return SUMMARIZER_PATTERN;
+default:
+  return null;
+  }
+}
+
+/**
+ * Find the _Fields constant that matches fieldId, throwing an exception
+ * if it is not found.
+ */
+public static _Fields findByThriftIdOrThrow(int fieldId) {
+  _Fields fields = findByThriftId(fieldId);
+  if (fields == null) throw new java.lang.IllegalArgumentException("Field 
" + fieldId + " doesn't exist!");
+  return fields;
+}
+
+/**
+ * Find the _Fields constant that matches name, or null if its not found.
+ */
+public static _Fields findByName(java.lang.String name) {
+  return byName.get(name);
+}
+
+private final short _thriftId;
+private final 

[5/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization

2017-03-20 Thread kturner
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/summary/SummaryCollection.java
--
diff --git 
a/core/src/main/java/org/apache/accumulo/core/summary/SummaryCollection.java 
b/core/src/main/java/org/apache/accumulo/core/summary/SummaryCollection.java
new file mode 100644
index 000..cc688c9
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/summary/SummaryCollection.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.summary;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.summary.Summarizer;
+import org.apache.accumulo.core.client.summary.SummarizerConfiguration;
+import org.apache.accumulo.core.client.summary.Summary;
+import org.apache.accumulo.core.data.thrift.TSummaries;
+import org.apache.accumulo.core.data.thrift.TSummarizerConfiguration;
+import org.apache.accumulo.core.data.thrift.TSummary;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * This class facilitates merging, storing, and serializing (to/from thrift) 
intermediate summary information.
+ */
+public class SummaryCollection {
+
+  private static class MergedSummary {
+Map summary;
+long filesContaining;
+long filesExceedingBoundry;
+long filesLarge;
+
+public MergedSummary(FileSummary entry) {
+  this.summary = entry.summary;
+  this.filesContaining = 1;
+  this.filesExceedingBoundry = entry.exceededBoundry ? 1 : 0;
+  this.filesLarge = entry.exceededMaxSize ? 1 : 0;
+}
+
+public MergedSummary(TSummary tSummary) {
+  this.summary = new HashMap<>(tSummary.getSummary());
+  this.filesContaining = tSummary.getFilesContaining();
+  this.filesExceedingBoundry = tSummary.getFilesExceeding();
+  this.filesLarge = tSummary.getFilesLarge();
+}
+
+public void merge(MergedSummary other, SummarizerConfiguration config, 
SummarizerFactory factory) {
+
+  if (summary == null && other.summary != null) {
+summary = new HashMap<>(other.summary);
+  } else if (summary != null && other.summary != null) {
+Summarizer summarizer = factory.getSummarizer(config);
+summarizer.combiner(config).merge(summary, other.summary);
+  }
+
+  filesContaining += other.filesContaining;
+  filesExceedingBoundry += other.filesExceedingBoundry;
+  filesLarge += other.filesLarge;
+}
+
+public TSummary toThrift(SummarizerConfiguration key) {
+  TSummarizerConfiguration tsumConf = 
SummarizerConfigurationUtil.toThrift(key);
+  return new TSummary(summary, tsumConf, filesContaining, 
filesExceedingBoundry, filesLarge);
+}
+
+  }
+
+  private Map mergedSummaries;
+  private long totalFiles;
+  private long deletedFiles;
+
+  public SummaryCollection() {
+mergedSummaries = new HashMap<>();
+totalFiles = 0;
+  }
+
+  public SummaryCollection(TSummaries tsums) {
+mergedSummaries = new HashMap<>();
+for (TSummary tSummary : tsums.getSummaries()) {
+  SummarizerConfiguration sconf = 
SummarizerConfigurationUtil.fromThrift(tSummary.getConfig());
+  mergedSummaries.put(sconf, new MergedSummary(tSummary));
+}
+
+totalFiles = tsums.getTotalFiles();
+deletedFiles = tsums.getDeletedFiles();
+  }
+
+  SummaryCollection(Collection initialEntries) {
+this(initialEntries, false);
+  }
+
+  SummaryCollection(Collection initialEntries, boolean deleted) {
+if (deleted) {
+  Preconditions.checkArgument(initialEntries.size() == 0);
+}
+mergedSummaries = new HashMap<>();
+for (FileSummary entry : initialEntries) {
+  mergedSummaries.put(entry.conf, new MergedSummary(entry));
+}
+totalFiles = 1;
+this.deletedFiles = deleted ? 1 : 0;
+  }
+
+  static class FileSummary {
+
+private SummarizerConfiguration conf;
+private Map summary;
+private boolean exceededBoundry;
+private boolean exceededMaxSize;
+
+