[2/2] accumulo git commit: Merge branch '1.8'
Merge branch '1.8' Conflicts: server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/cf69e3f6 Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/cf69e3f6 Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/cf69e3f6 Branch: refs/heads/master Commit: cf69e3f675b8b3445a0e01de4331f0fd16e1e43f Parents: 94cdcc4 f02b731 Author: Mike Miller Authored: Mon Mar 20 13:58:34 2017 -0400 Committer: Mike Miller Committed: Mon Mar 20 13:58:34 2017 -0400 -- .../main/java/org/apache/accumulo/server/fs/VolumeManager.java | 4 .../java/org/apache/accumulo/server/fs/VolumeManagerImpl.java | 5 + .../main/java/org/apache/accumulo/server/init/Initialize.java | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/accumulo/blob/cf69e3f6/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java -- diff --cc server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java index 09436a5,69f883f..06ece48 --- a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java +++ b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java @@@ -28,6 -27,9 +28,7 @@@ import org.apache.hadoop.fs.FSDataInput import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; - -import com.google.common.base.Optional; + import org.apache.hadoop.fs.permission.FsPermission; /** * A wrapper around multiple hadoop FileSystem objects, which are assumed to be different volumes. This also concentrates a bunch of meta-operations like http://git-wip-us.apache.org/repos/asf/accumulo/blob/cf69e3f6/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java -- http://git-wip-us.apache.org/repos/asf/accumulo/blob/cf69e3f6/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java --
[1/2] accumulo git commit: ACCUMULO-2806: changed permissions of /accumulo to 700
Repository: accumulo Updated Branches: refs/heads/master 94cdcc4d3 -> cf69e3f67 ACCUMULO-2806: changed permissions of /accumulo to 700 Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/f02b731a Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/f02b731a Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/f02b731a Branch: refs/heads/master Commit: f02b731a2b948266001cddb0c2ad5f1f5d25152f Parents: dba4447 Author: Mike Miller Authored: Fri Feb 24 15:32:36 2017 -0500 Committer: Mike Miller Committed: Mon Mar 20 12:52:52 2017 -0400 -- .../main/java/org/apache/accumulo/server/fs/VolumeManager.java | 4 .../java/org/apache/accumulo/server/fs/VolumeManagerImpl.java | 5 + .../main/java/org/apache/accumulo/server/init/Initialize.java | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/accumulo/blob/f02b731a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java -- diff --git a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java index e761e4f..69f883f 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java +++ b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import com.google.common.base.Optional; +import org.apache.hadoop.fs.permission.FsPermission; /** * A wrapper around multiple hadoop FileSystem objects, which are assumed to be different volumes. This also concentrates a bunch of meta-operations like @@ -126,6 +127,9 @@ public interface VolumeManager { boolean mkdirs(Path directory) throws IOException; // forward to the appropriate FileSystem object + boolean mkdirs(Path path, FsPermission permission) throws IOException; + + // forward to the appropriate FileSystem object FSDataInputStream open(Path path) throws IOException; // forward to the appropriate FileSystem object, throws an exception if the paths are in different volumes http://git-wip-us.apache.org/repos/asf/accumulo/blob/f02b731a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java -- diff --git a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java index 116cb0c..4758421 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java +++ b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java @@ -274,6 +274,11 @@ public class VolumeManagerImpl implements VolumeManager { } @Override + public boolean mkdirs(Path path, FsPermission permission) throws IOException { +return getVolumeByPath(path).getFileSystem().mkdirs(path, permission); + } + + @Override public FSDataInputStream open(Path path) throws IOException { return getVolumeByPath(path).getFileSystem().open(path); } http://git-wip-us.apache.org/repos/asf/accumulo/blob/f02b731a/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java -- diff --git a/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java b/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java index 6531787..12c4a6e 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java +++ b/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java @@ -103,6 +103,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.UserGroupInformation; import org.apache.zookeeper.KeeperException; @@ -402,7 +403,7 @@ public class Initialize implements KeywordExecutable { private static void initDirs(VolumeManager fs, UUID uuid, String[] baseDirs, boolean print) throws IOException { for (String baseDir : baseDirs) { - fs.mkdirs(new Path(new Path(baseDir, ServerConstants.VERSION_DIR), "" + ServerConstants.DATA_VERSION)); + fs.mkdirs(new Path(new Path(baseDir, ServerConstants.VERSION_DIR), "" + ServerConstants.DATA_VERSION), new FsPermission("700")); // create an instance id Path iidLocation = new Path(baseDir, ServerConstants.INSTANCE_ID_DIR);
accumulo git commit: ACCUMULO-2806: changed permissions of /accumulo to 700
Repository: accumulo Updated Branches: refs/heads/1.8 dba444757 -> f02b731a2 ACCUMULO-2806: changed permissions of /accumulo to 700 Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/f02b731a Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/f02b731a Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/f02b731a Branch: refs/heads/1.8 Commit: f02b731a2b948266001cddb0c2ad5f1f5d25152f Parents: dba4447 Author: Mike Miller Authored: Fri Feb 24 15:32:36 2017 -0500 Committer: Mike Miller Committed: Mon Mar 20 12:52:52 2017 -0400 -- .../main/java/org/apache/accumulo/server/fs/VolumeManager.java | 4 .../java/org/apache/accumulo/server/fs/VolumeManagerImpl.java | 5 + .../main/java/org/apache/accumulo/server/init/Initialize.java | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/accumulo/blob/f02b731a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java -- diff --git a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java index e761e4f..69f883f 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java +++ b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManager.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import com.google.common.base.Optional; +import org.apache.hadoop.fs.permission.FsPermission; /** * A wrapper around multiple hadoop FileSystem objects, which are assumed to be different volumes. This also concentrates a bunch of meta-operations like @@ -126,6 +127,9 @@ public interface VolumeManager { boolean mkdirs(Path directory) throws IOException; // forward to the appropriate FileSystem object + boolean mkdirs(Path path, FsPermission permission) throws IOException; + + // forward to the appropriate FileSystem object FSDataInputStream open(Path path) throws IOException; // forward to the appropriate FileSystem object, throws an exception if the paths are in different volumes http://git-wip-us.apache.org/repos/asf/accumulo/blob/f02b731a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java -- diff --git a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java index 116cb0c..4758421 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java +++ b/server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java @@ -274,6 +274,11 @@ public class VolumeManagerImpl implements VolumeManager { } @Override + public boolean mkdirs(Path path, FsPermission permission) throws IOException { +return getVolumeByPath(path).getFileSystem().mkdirs(path, permission); + } + + @Override public FSDataInputStream open(Path path) throws IOException { return getVolumeByPath(path).getFileSystem().open(path); } http://git-wip-us.apache.org/repos/asf/accumulo/blob/f02b731a/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java -- diff --git a/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java b/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java index 6531787..12c4a6e 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java +++ b/server/base/src/main/java/org/apache/accumulo/server/init/Initialize.java @@ -103,6 +103,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.UserGroupInformation; import org.apache.zookeeper.KeeperException; @@ -402,7 +403,7 @@ public class Initialize implements KeywordExecutable { private static void initDirs(VolumeManager fs, UUID uuid, String[] baseDirs, boolean print) throws IOException { for (String baseDir : baseDirs) { - fs.mkdirs(new Path(new Path(baseDir, ServerConstants.VERSION_DIR), "" + ServerConstants.DATA_VERSION)); + fs.mkdirs(new Path(new Path(baseDir, ServerConstants.VERSION_DIR), "" + ServerConstants.DATA_VERSION), new FsPermission("700")); // create an instance id Path iidLocation = new Path(baseDir, ServerConstants.INSTANCE_ID_DIR);
[1/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization
Repository: accumulo Updated Branches: refs/heads/master 68ba2ef11 -> 94cdcc4d3 http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/test/src/main/java/org/apache/accumulo/test/functional/SummaryIT.java -- diff --git a/test/src/main/java/org/apache/accumulo/test/functional/SummaryIT.java b/test/src/main/java/org/apache/accumulo/test/functional/SummaryIT.java new file mode 100644 index 000..919307b --- /dev/null +++ b/test/src/main/java/org/apache/accumulo/test/functional/SummaryIT.java @@ -0,0 +1,820 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.test.functional; + +import static java.util.function.Function.identity; +import static java.util.stream.Collectors.counting; +import static java.util.stream.Collectors.groupingBy; +import static org.apache.accumulo.test.functional.BasicSummarizer.DELETES_STAT; +import static org.apache.accumulo.test.functional.BasicSummarizer.MAX_TIMESTAMP_STAT; +import static org.apache.accumulo.test.functional.BasicSummarizer.MIN_TIMESTAMP_STAT; +import static org.apache.accumulo.test.functional.BasicSummarizer.TOTAL_STAT; +import static org.apache.accumulo.core.client.summary.CountingSummarizer.DELETES_IGNORED_STAT; +import static org.apache.accumulo.core.client.summary.CountingSummarizer.EMITTED_STAT; +import static org.apache.accumulo.core.client.summary.CountingSummarizer.SEEN_STAT; +import static org.apache.accumulo.core.client.summary.CountingSummarizer.TOO_LONG_STAT; +import static org.apache.accumulo.core.client.summary.CountingSummarizer.TOO_MANY_STAT; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.LongSummaryStatistics; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Random; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.regex.PatternSyntaxException; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.BatchWriter; +import org.apache.accumulo.core.client.BatchWriterConfig; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.IteratorSetting; +import org.apache.accumulo.core.client.MutationsRejectedException; +import org.apache.accumulo.core.client.Scanner; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.client.TableOfflineException; +import org.apache.accumulo.core.client.ZooKeeperInstance; +import org.apache.accumulo.core.client.admin.CompactionConfig; +import org.apache.accumulo.core.client.admin.CompactionStrategyConfig; +import org.apache.accumulo.core.client.admin.NewTableConfiguration; +import org.apache.accumulo.core.client.impl.AccumuloServerException; +import org.apache.accumulo.core.client.security.SecurityErrorCode; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.accumulo.core.client.summary.CounterSummary; +import org.apache.accumulo.core.client.summary.Summarizer; +import org.apache.accumulo.core.client.summary.SummarizerConfiguration; +import org.apache.accumulo.core.client.summary.Summary; +import org.apache.accumulo.core.client.summary.Summary.FileStatistics; +import org.apache.accumulo.core.client.summary.summarizers.FamilySummarizer; +import org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.iterators.Filter; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.accumulo.core.security.TablePermission; +import org.apache.accumulo.fate.util.UtilWaitThread; +import org.apache.accumulo.harness.AccumuloClusterHarness; +import org.apache.accumulo.tserver.compaction.CompactionPlan; +import org.apache.a
[4/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java -- diff --git a/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java b/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java index 641556c..c45d91f 100644 --- a/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java +++ b/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java @@ -90,6 +90,14 @@ public class TabletClientService { public java.util.List getActiveLogs(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials) throws org.apache.thrift.TException; +public org.apache.accumulo.core.data.thrift.TSummaries startGetSummaries(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TSummaryRequest request) throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException, org.apache.thrift.TException; + +public org.apache.accumulo.core.data.thrift.TSummaries startGetSummariesForPartition(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TSummaryRequest request, int modulus, int remainder) throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, org.apache.thrift.TException; + +public org.apache.accumulo.core.data.thrift.TSummaries startGetSummariesFromFiles(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TSummaryRequest request, java.util.Map> files) throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, org.apache.thrift.TException; + +public org.apache.accumulo.core.data.thrift.TSummaries contiuneGetSummaries(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long sessionId) throws NoSuchScanIDException, org.apache.thrift.TException; + } public interface AsyncIface extends org.apache.accumulo.core.client.impl.thrift.ClientService .AsyncIface { @@ -156,6 +164,14 @@ public class TabletClientService { public void getActiveLogs(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.thrift.async.AsyncMethodCallback> resultHandler) throws org.apache.thrift.TException; +public void startGetSummaries(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TSummaryRequest request, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; + +public void startGetSummariesForPartition(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TSummaryRequest request, int modulus, int remainder, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; + +public void startGetSummariesFromFiles(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TSummaryRequest request, java.util.Map> files, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; + +public void contiuneGetSummaries(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long sessionId, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; + } public static class Client extends org.apache.accumulo.core.client.impl.thrift.ClientService.Client implements Iface { @@ -934,6 +950,123 @@ public class TabletClientService { throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "getActiveLogs failed: unknown result"); } +public org.apache.accumulo.core.data.thrift.TSummaries startGetSummaries(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TSummaryRequest request) throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException, org.apache.thrift.TException +{ + send_startGetSummaries(tinfo, credentials, request); + return recv_startGetSummaries(); +} + +public void send_startGetSummaries(org.apache.
[7/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaries.java -- diff --git a/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaries.java b/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaries.java new file mode 100644 index 000..1c860ff --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaries.java @@ -0,0 +1,831 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Autogenerated by Thrift Compiler (0.10.0) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +package org.apache.accumulo.core.data.thrift; + +@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked", "unused"}) +@javax.annotation.Generated(value = "Autogenerated by Thrift Compiler (0.10.0)") +public class TSummaries implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TSummaries"); + + private static final org.apache.thrift.protocol.TField FINISHED_FIELD_DESC = new org.apache.thrift.protocol.TField("finished", org.apache.thrift.protocol.TType.BOOL, (short)1); + private static final org.apache.thrift.protocol.TField SESSION_ID_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionId", org.apache.thrift.protocol.TType.I64, (short)2); + private static final org.apache.thrift.protocol.TField TOTAL_FILES_FIELD_DESC = new org.apache.thrift.protocol.TField("totalFiles", org.apache.thrift.protocol.TType.I64, (short)3); + private static final org.apache.thrift.protocol.TField DELETED_FILES_FIELD_DESC = new org.apache.thrift.protocol.TField("deletedFiles", org.apache.thrift.protocol.TType.I64, (short)4); + private static final org.apache.thrift.protocol.TField SUMMARIES_FIELD_DESC = new org.apache.thrift.protocol.TField("summaries", org.apache.thrift.protocol.TType.LIST, (short)5); + + private static final org.apache.thrift.scheme.SchemeFactory STANDARD_SCHEME_FACTORY = new TSummariesStandardSchemeFactory(); + private static final org.apache.thrift.scheme.SchemeFactory TUPLE_SCHEME_FACTORY = new TSummariesTupleSchemeFactory(); + + public boolean finished; // required + public long sessionId; // required + public long totalFiles; // required + public long deletedFiles; // required + public java.util.List summaries; // required + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { +FINISHED((short)1, "finished"), +SESSION_ID((short)2, "sessionId"), +TOTAL_FILES((short)3, "totalFiles"), +DELETED_FILES((short)4, "deletedFiles"), +SUMMARIES((short)5, "summaries"); + +private static final java.util.Map byName = new java.util.HashMap(); + +static { + for (_Fields field : java.util.EnumSet.allOf(_Fields.class)) { +byName.put(field.getFieldName(), field); + } +} + +/** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ +public static _Fields findByThriftId(int fieldId) { + switch(fieldId) { +case 1: // FINISHED + return FINISHED; +case 2: // SESSION_ID + return SESSION_ID; +case 3: // TOTAL_FILES + return TOTAL_FILES; +case 4: // DELETED_FILES + return DELETED_FILES; +case 5: // SUMMARIES + return SUMMARIES; +default: + return null; + } +} + +/** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ +public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new java.lang.IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; +} + +/** + * Find the _Fields constant that matches name, or null if its not found. + */ +public static _Fields findByN
[9/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization
ACCUMULO-4501 ACCUMULO-96 Added Summarization closes apache/accumulo#224 closes apache/accumulo#168 Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/94cdcc4d Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/94cdcc4d Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/94cdcc4d Branch: refs/heads/master Commit: 94cdcc4d3f0a8ccf95894f206cb71e6117f4e51d Parents: 68ba2ef Author: Keith Turner Authored: Mon Mar 20 10:47:00 2017 -0400 Committer: Keith Turner Committed: Mon Mar 20 10:47:00 2017 -0400 -- .../client/admin/NewTableConfiguration.java | 49 +- .../core/client/admin/SummaryRetriever.java | 112 + .../core/client/admin/TableOperations.java | 65 + .../accumulo/core/client/impl/ServerClient.java | 36 +- .../core/client/impl/TableOperationsImpl.java | 151 +- .../client/mapred/AccumuloFileOutputFormat.java | 16 + .../mapreduce/AccumuloFileOutputFormat.java | 16 + .../lib/impl/FileOutputConfigurator.java|9 + .../core/client/mock/MockTableOperations.java | 27 + .../accumulo/core/client/rfile/RFile.java | 129 + .../core/client/rfile/RFileScannerBuilder.java |2 + .../client/rfile/RFileSummariesRetriever.java | 122 + .../accumulo/core/client/rfile/RFileWriter.java | 26 + .../core/client/rfile/RFileWriterBuilder.java | 38 +- .../core/client/summary/CounterSummary.java | 123 + .../core/client/summary/CountingSummarizer.java | 302 + .../core/client/summary/Summarizer.java | 227 + .../client/summary/SummarizerConfiguration.java | 285 + .../accumulo/core/client/summary/Summary.java | 145 + .../summary/summarizers/DeletesSummarizer.java | 75 + .../summary/summarizers/FamilySummarizer.java | 46 + .../summarizers/VisibilitySummarizer.java | 47 + .../core/compaction/CompactionSettings.java |2 + .../org/apache/accumulo/core/conf/Property.java | 22 +- .../apache/accumulo/core/conf/PropertyType.java |9 +- .../accumulo/core/data/ArrayByteSequence.java | 18 + .../accumulo/core/data/thrift/TRowRange.java| 521 ++ .../accumulo/core/data/thrift/TSummaries.java | 831 +++ .../data/thrift/TSummarizerConfiguration.java | 649 ++ .../accumulo/core/data/thrift/TSummary.java | 842 +++ .../core/data/thrift/TSummaryRequest.java | 760 +++ .../accumulo/core/file/BloomFilterLayer.java| 10 +- .../core/file/DispatchingFileFactory.java |7 +- .../accumulo/core/file/FileOperations.java | 18 + .../accumulo/core/file/rfile/PrintInfo.java |7 + .../core/file/rfile/RFileOperations.java|2 +- .../core/metadata/schema/MetadataScanner.java | 236 + .../core/metadata/schema/TabletMetadata.java| 182 + .../sample/impl/SamplerConfigurationImpl.java | 12 - .../core/sample/impl/SamplerFactory.java|8 +- .../accumulo/core/security/TablePermission.java |5 +- .../apache/accumulo/core/summary/Gatherer.java | 631 ++ .../summary/SummarizerConfigurationUtil.java| 128 + .../core/summary/SummarizerFactory.java | 63 + .../core/summary/SummaryCollection.java | 188 + .../accumulo/core/summary/SummaryInfo.java | 53 + .../accumulo/core/summary/SummaryReader.java| 257 + .../core/summary/SummarySerializer.java | 542 ++ .../accumulo/core/summary/SummaryWriter.java| 157 + .../thrift/TabletClientService.java | 5642 +- .../accumulo/core/util/CancelFlagFuture.java| 67 + .../core/util/CompletableFutureUtil.java| 49 + core/src/main/thrift/data.thrift| 34 + core/src/main/thrift/tabletserver.thrift|5 + .../client/impl/TableOperationsHelperTest.java | 26 + .../mapred/AccumuloFileOutputFormatTest.java| 18 + .../mapreduce/AccumuloFileOutputFormatTest.java | 18 + .../accumulo/core/client/rfile/RFileTest.java | 158 +- .../client/summary/CountingSummarizerTest.java | 259 + .../core/summary/SummaryCollectionTest.java | 72 + .../core/util/CompletableFutureUtilTest.java| 53 + .../main/asciidoc/accumulo_user_manual.asciidoc |2 + docs/src/main/asciidoc/chapters/summaries.txt | 232 + .../standalone/StandaloneAccumuloCluster.java |3 +- .../standalone/StandaloneClusterControl.java|3 +- .../impl/MiniAccumuloConfigImpl.java|3 +- .../server/security/SecurityOperation.java |5 + .../apache/accumulo/tserver/TabletServer.java | 115 + .../tserver/TabletServerResourceManager.java| 49 +- .../tserver/compaction/CompactionStrategy.java |1 - .../compaction/MajorCompactionRequest.java | 84 +- .../ConfigurableCompactionStrategy.java | 99 +- .../TooManyDeletesCompactionStrategy.java | 173 + .../tserver/session/SummarySession.java |
[8/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/client/summary/Summarizer.java -- diff --git a/core/src/main/java/org/apache/accumulo/core/client/summary/Summarizer.java b/core/src/main/java/org/apache/accumulo/core/client/summary/Summarizer.java new file mode 100644 index 000..fdb194b --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/client/summary/Summarizer.java @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.core.client.summary; + +import java.util.Map; + +import org.apache.accumulo.core.client.admin.TableOperations; +import org.apache.accumulo.core.client.rfile.RFile; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +//checkstyle and the formatter are in conflict, so turn off the formatter +//@formatter:off +/** + * + * Instances of this interface can be configured for Accumulo tables. When Accumulo compacts files, it will use this Factory to create {@link Collector} and + * {@link Combiner} objects to generate summary information about the data in the file. + * + * + * In order to merge summary information from multiple files, Accumulo will use this factory to create a {@link Combiner} object. + * + * + * Below is an example of a very simple summarizer that will compute the number of deletes, total number of keys, min timestamp and max timestamp. + * + * + * + * public class BasicSummarizer implements Summarizer { + * + * public static final String DELETES_STAT = "deletes"; + * public static final String MIN_STAMP_STAT = "minStamp"; + * public static final String MAX_STAMP_STAT = "maxStamp"; + * public static final String TOTAL_STAT = "total"; + * + * @Override + * public Collector collector(SummarizerConfiguration sc) { + * return new Collector() { + * + * private long minStamp = Long.MAX_VALUE; + * private long maxStamp = Long.MIN_VALUE; + * private long deletes = 0; + * private long total = 0; + * + * @Override + * public void accept(Key k, Value v) { + * if (k.getTimestamp() < minStamp) { + * minStamp = k.getTimestamp(); + * } + * + * if (k.getTimestamp() > maxStamp) { + * maxStamp = k.getTimestamp(); + * } + * + * if (k.isDeleted()) { + * deletes++; + * } + * + * total++; + * } + * + * @Override + * public void summarize(StatisticConsumer sc) { + * sc.accept(MIN_STAMP_STAT, minStamp); + * sc.accept(MAX_STAMP_STAT, maxStamp); + * sc.accept(DELETES_STAT, deletes); + * sc.accept(TOTAL_STAT, total); + * } + * }; + * } + * + * @Override + * public Combiner combiner(SummarizerConfiguration sc) { + * return (stats1, stats2) -> { + * stats1.merge(DELETES_STAT, stats2.get(DELETES_STAT), Long::sum); + * stats1.merge(TOTAL_STAT, stats2.get(TOTAL_STAT), Long::sum); + * stats1.merge(MIN_STAMP_STAT, stats2.get(MIN_STAMP_STAT), Long::min); + * stats1.merge(MAX_STAMP_STAT, stats2.get(MAX_STAMP_STAT), Long::max); + * }; + * } + * } + * + * + * + * + * Below is an example summarizer that counts the log of the value length. + * + * + * + * public class ValueLogLengthSummarizer implements Summarizer { + * + * @Override + * public Collector collector(SummarizerConfiguration sc) { + * + *return new Collector(){ + * + * long[] counts = new long[32]; + * + * @Override + * public void accept(Key k, Value v) { + *int idx; + *if(v.getSize() == 0) + * idx = 0; + *else + * idx = IntMath.log2(v.getSize(), RoundingMode.UP); //IntMath is from Guava + * + *counts[idx]++; + * } + * + * @Override + * public void summarize(StatisticConsumer sc) { + *for (int i = 0; i < counts.length; i++) { + * if(counts[i] > 0) { + *sc.accept(""+(1<
[2/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/MajorCompactionRequest.java -- diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/MajorCompactionRequest.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/MajorCompactionRequest.java index 08bff26..e29d30c 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/MajorCompactionRequest.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/MajorCompactionRequest.java @@ -17,9 +17,20 @@ package org.apache.accumulo.tserver.compaction; import java.io.IOException; +import java.util.Collection; import java.util.Collections; +import java.util.List; import java.util.Map; +import java.util.function.Predicate; +import org.apache.accumulo.core.client.admin.TableOperations; +import org.apache.accumulo.core.client.mapred.AccumuloFileOutputFormat; +import org.apache.accumulo.core.client.rfile.RFile.WriterOptions; +import org.apache.accumulo.core.client.summary.Summarizer; +import org.apache.accumulo.core.client.summary.Summarizer.Combiner; +import org.apache.accumulo.core.client.summary.SummarizerConfiguration; +import org.apache.accumulo.core.client.summary.Summary; +import org.apache.accumulo.core.client.summary.Summary.FileStatistics; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.TabletId; @@ -27,11 +38,21 @@ import org.apache.accumulo.core.data.impl.KeyExtent; import org.apache.accumulo.core.data.impl.TabletIdImpl; import org.apache.accumulo.core.file.FileOperations; import org.apache.accumulo.core.file.FileSKVIterator; +import org.apache.accumulo.core.file.blockfile.cache.BlockCache; import org.apache.accumulo.core.metadata.schema.DataFileValue; +import org.apache.accumulo.core.summary.SummaryReader; +import org.apache.accumulo.core.summary.Gatherer; +import org.apache.accumulo.core.summary.SummarizerFactory; +import org.apache.accumulo.core.summary.SummaryCollection; +import org.apache.accumulo.core.util.CachedConfiguration; import org.apache.accumulo.server.fs.FileRef; import org.apache.accumulo.server.fs.VolumeManager; +import org.apache.accumulo.tserver.compaction.strategies.TooManyDeletesCompactionStrategy; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import com.google.common.base.Preconditions; + /** * Information that can be used to determine how a tablet is to be major compacted, if needed. */ @@ -40,18 +61,27 @@ public class MajorCompactionRequest implements Cloneable { final private MajorCompactionReason reason; final private VolumeManager volumeManager; final private AccumuloConfiguration tableConfig; + final private BlockCache indexCache; + final private BlockCache summaryCache; private Map files; - public MajorCompactionRequest(KeyExtent extent, MajorCompactionReason reason, VolumeManager manager, AccumuloConfiguration tabletConfig) { + public MajorCompactionRequest(KeyExtent extent, MajorCompactionReason reason, VolumeManager manager, AccumuloConfiguration tabletConfig, + BlockCache summaryCache, BlockCache indexCache) { this.extent = extent; this.reason = reason; this.volumeManager = manager; this.tableConfig = tabletConfig; this.files = Collections.emptyMap(); +this.summaryCache = summaryCache; +this.indexCache = indexCache; + } + + public MajorCompactionRequest(KeyExtent extent, MajorCompactionReason reason, AccumuloConfiguration tabletConfig) { +this(extent, reason, null, tabletConfig, null, null); } public MajorCompactionRequest(MajorCompactionRequest mcr) { -this(mcr.extent, mcr.reason, mcr.volumeManager, mcr.tableConfig); +this(mcr.extent, mcr.reason, mcr.volumeManager, mcr.tableConfig, mcr.summaryCache, mcr.indexCache); // know this is already unmodifiable, no need to wrap again this.files = mcr.files; } @@ -68,11 +98,61 @@ public class MajorCompactionRequest implements Cloneable { return files; } + /** + * Returns all summaries present in each file. + * + * + * This method can only be called from {@link CompactionStrategy#gatherInformation(MajorCompactionRequest)}. Unfortunately, {@code gatherInformation()} is not + * called before {@link CompactionStrategy#shouldCompact(MajorCompactionRequest)}. Therefore {@code shouldCompact()) should just return true when a compactions strategy + * wants to use summary information. + * + * + * When using summaries to make compaction decisions, its important to ensure that all summary data fits in the tablet server summary cache. The size of this + * cache is configured by code tserver.cache.summary.size}. Also its important to use the summarySelector predicate
[3/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/util/CancelFlagFuture.java -- diff --git a/core/src/main/java/org/apache/accumulo/core/util/CancelFlagFuture.java b/core/src/main/java/org/apache/accumulo/core/util/CancelFlagFuture.java new file mode 100644 index 000..9da91c0 --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/util/CancelFlagFuture.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.core.util; + +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * A simple future wrapper that will set an atomic boolean to true if a future is successfully canceled + */ +public class CancelFlagFuture implements Future { + + private Future wrappedFuture; + private AtomicBoolean cancelFlag; + + public CancelFlagFuture(Future wrappedFuture, AtomicBoolean cancelFlag) { +this.wrappedFuture = wrappedFuture; +this.cancelFlag = cancelFlag; + } + + @Override + public boolean cancel(boolean mayInterruptIfRunning) { +boolean ret = wrappedFuture.cancel(mayInterruptIfRunning); +if (ret) { + cancelFlag.set(true); +} +return ret; + } + + @Override + public boolean isCancelled() { +return wrappedFuture.isCancelled(); + } + + @Override + public boolean isDone() { +return wrappedFuture.isDone(); + } + + @Override + public T get() throws InterruptedException, ExecutionException { +return wrappedFuture.get(); + } + + @Override + public T get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { +return wrappedFuture.get(timeout, unit); + } +} http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/util/CompletableFutureUtil.java -- diff --git a/core/src/main/java/org/apache/accumulo/core/util/CompletableFutureUtil.java b/core/src/main/java/org/apache/accumulo/core/util/CompletableFutureUtil.java new file mode 100644 index 000..c417b0f --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/util/CompletableFutureUtil.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.core.util; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.function.BiFunction; +import java.util.function.Supplier; + +public class CompletableFutureUtil { + + // create a binary tree of completable future operations, where each node in the tree merges the results of their children when complete + public static CompletableFuture merge(List> futures, BiFunction mergeFunc, Supplier nothing) { +if (futures.size() == 0) { + return CompletableFuture.completedFuture(nothing.get()); +} +while (futures.size() > 1) { + ArrayList> mergedFutures = new ArrayList<>(futures.size() / 2); + for (int i = 0; i < futures.size(); i += 2) { +if (i + 1 == futures.size()) { + mergedFutures.add(futures.get(i)); +} else { + mergedFutures.add(futures.get(i).thenCombine(futures.get(i + 1), m
[6/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaryRequest.java -- diff --git a/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaryRequest.java b/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaryRequest.java new file mode 100644 index 000..78c242e --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/data/thrift/TSummaryRequest.java @@ -0,0 +1,760 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Autogenerated by Thrift Compiler (0.10.0) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +package org.apache.accumulo.core.data.thrift; + +@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked", "unused"}) +@javax.annotation.Generated(value = "Autogenerated by Thrift Compiler (0.10.0)") +public class TSummaryRequest implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TSummaryRequest"); + + private static final org.apache.thrift.protocol.TField TABLE_ID_FIELD_DESC = new org.apache.thrift.protocol.TField("tableId", org.apache.thrift.protocol.TType.STRING, (short)1); + private static final org.apache.thrift.protocol.TField BOUNDS_FIELD_DESC = new org.apache.thrift.protocol.TField("bounds", org.apache.thrift.protocol.TType.STRUCT, (short)2); + private static final org.apache.thrift.protocol.TField SUMMARIZERS_FIELD_DESC = new org.apache.thrift.protocol.TField("summarizers", org.apache.thrift.protocol.TType.LIST, (short)3); + private static final org.apache.thrift.protocol.TField SUMMARIZER_PATTERN_FIELD_DESC = new org.apache.thrift.protocol.TField("summarizerPattern", org.apache.thrift.protocol.TType.STRING, (short)4); + + private static final org.apache.thrift.scheme.SchemeFactory STANDARD_SCHEME_FACTORY = new TSummaryRequestStandardSchemeFactory(); + private static final org.apache.thrift.scheme.SchemeFactory TUPLE_SCHEME_FACTORY = new TSummaryRequestTupleSchemeFactory(); + + public java.lang.String tableId; // required + public TRowRange bounds; // required + public java.util.List summarizers; // required + public java.lang.String summarizerPattern; // required + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { +TABLE_ID((short)1, "tableId"), +BOUNDS((short)2, "bounds"), +SUMMARIZERS((short)3, "summarizers"), +SUMMARIZER_PATTERN((short)4, "summarizerPattern"); + +private static final java.util.Map byName = new java.util.HashMap(); + +static { + for (_Fields field : java.util.EnumSet.allOf(_Fields.class)) { +byName.put(field.getFieldName(), field); + } +} + +/** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ +public static _Fields findByThriftId(int fieldId) { + switch(fieldId) { +case 1: // TABLE_ID + return TABLE_ID; +case 2: // BOUNDS + return BOUNDS; +case 3: // SUMMARIZERS + return SUMMARIZERS; +case 4: // SUMMARIZER_PATTERN + return SUMMARIZER_PATTERN; +default: + return null; + } +} + +/** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ +public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new java.lang.IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; +} + +/** + * Find the _Fields constant that matches name, or null if its not found. + */ +public static _Fields findByName(java.lang.String name) { + return byName.get(name); +} + +private final short _thriftId; +private final java.lang.String _fieldName; + +_Fields(short thriftId, java.lang.String fieldName) { + _thriftId
[5/9] accumulo git commit: ACCUMULO-4501 ACCUMULO-96 Added Summarization
http://git-wip-us.apache.org/repos/asf/accumulo/blob/94cdcc4d/core/src/main/java/org/apache/accumulo/core/summary/SummaryCollection.java -- diff --git a/core/src/main/java/org/apache/accumulo/core/summary/SummaryCollection.java b/core/src/main/java/org/apache/accumulo/core/summary/SummaryCollection.java new file mode 100644 index 000..cc688c9 --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/summary/SummaryCollection.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.core.summary; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.accumulo.core.client.summary.Summarizer; +import org.apache.accumulo.core.client.summary.SummarizerConfiguration; +import org.apache.accumulo.core.client.summary.Summary; +import org.apache.accumulo.core.data.thrift.TSummaries; +import org.apache.accumulo.core.data.thrift.TSummarizerConfiguration; +import org.apache.accumulo.core.data.thrift.TSummary; + +import com.google.common.base.Preconditions; + +/** + * This class facilitates merging, storing, and serializing (to/from thrift) intermediate summary information. + */ +public class SummaryCollection { + + private static class MergedSummary { +Map summary; +long filesContaining; +long filesExceedingBoundry; +long filesLarge; + +public MergedSummary(FileSummary entry) { + this.summary = entry.summary; + this.filesContaining = 1; + this.filesExceedingBoundry = entry.exceededBoundry ? 1 : 0; + this.filesLarge = entry.exceededMaxSize ? 1 : 0; +} + +public MergedSummary(TSummary tSummary) { + this.summary = new HashMap<>(tSummary.getSummary()); + this.filesContaining = tSummary.getFilesContaining(); + this.filesExceedingBoundry = tSummary.getFilesExceeding(); + this.filesLarge = tSummary.getFilesLarge(); +} + +public void merge(MergedSummary other, SummarizerConfiguration config, SummarizerFactory factory) { + + if (summary == null && other.summary != null) { +summary = new HashMap<>(other.summary); + } else if (summary != null && other.summary != null) { +Summarizer summarizer = factory.getSummarizer(config); +summarizer.combiner(config).merge(summary, other.summary); + } + + filesContaining += other.filesContaining; + filesExceedingBoundry += other.filesExceedingBoundry; + filesLarge += other.filesLarge; +} + +public TSummary toThrift(SummarizerConfiguration key) { + TSummarizerConfiguration tsumConf = SummarizerConfigurationUtil.toThrift(key); + return new TSummary(summary, tsumConf, filesContaining, filesExceedingBoundry, filesLarge); +} + + } + + private Map mergedSummaries; + private long totalFiles; + private long deletedFiles; + + public SummaryCollection() { +mergedSummaries = new HashMap<>(); +totalFiles = 0; + } + + public SummaryCollection(TSummaries tsums) { +mergedSummaries = new HashMap<>(); +for (TSummary tSummary : tsums.getSummaries()) { + SummarizerConfiguration sconf = SummarizerConfigurationUtil.fromThrift(tSummary.getConfig()); + mergedSummaries.put(sconf, new MergedSummary(tSummary)); +} + +totalFiles = tsums.getTotalFiles(); +deletedFiles = tsums.getDeletedFiles(); + } + + SummaryCollection(Collection initialEntries) { +this(initialEntries, false); + } + + SummaryCollection(Collection initialEntries, boolean deleted) { +if (deleted) { + Preconditions.checkArgument(initialEntries.size() == 0); +} +mergedSummaries = new HashMap<>(); +for (FileSummary entry : initialEntries) { + mergedSummaries.put(entry.conf, new MergedSummary(entry)); +} +totalFiles = 1; +this.deletedFiles = deleted ? 1 : 0; + } + + static class FileSummary { + +private SummarizerConfiguration conf; +private Map summary; +private boolean exceededBoundry; +private boolean exceededMaxSize; + +FileSummary(SummarizerConfiguration conf, Map summary, boolean ex