keith-turner closed pull request #737: Added Authorization Summarizer URL: https://github.com/apache/accumulo/pull/737
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/AuthorizationSummarizer.java b/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/AuthorizationSummarizer.java new file mode 100644 index 0000000000..efee030210 --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/AuthorizationSummarizer.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.core.client.summary.summarizers; + +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; +import java.util.function.Consumer; + +import org.apache.accumulo.core.client.admin.TableOperations; +import org.apache.accumulo.core.client.summary.CountingSummarizer; +import org.apache.accumulo.core.data.ArrayByteSequence; +import org.apache.accumulo.core.data.ByteSequence; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.accumulo.core.security.ColumnVisibility.Node; + +/** + * Counts unique authorizations in column visibility labels. Leverages super class to defend against + * too many. This class is useful for discovering what authorizations are present when the expected + * number of authorizations is small. + * + * <p> + * As an example, assume a data set of three keys with the column visibilities : + * {@code (A&C)|(A&D)}, {@code A&B}, and {@code C|E}. For these input this summarizer would output : + * {@code c:A=2}, {@code c:B=1}, {@code c:C=2}, {@code D:1}, {@code E:1}. Notice that even though + * {@code A} occurred 3 times in total, its only counted once per column visibility. + * + * <p> + * See the superclass documentation for more information about usage and configuration. + * + * @since 2.0.0 + * + * @see VisibilitySummarizer + * @see TableOperations#addSummarizers(String, + * org.apache.accumulo.core.client.summary.SummarizerConfiguration...) + * @see TableOperations#summaries(String) + */ +public class AuthorizationSummarizer extends CountingSummarizer<ByteSequence> { + + @Override + protected Converter<ByteSequence> converter() { + return new AuthsConverter(); + } + + private static class AuthsConverter implements Converter<ByteSequence> { + + final int MAX_ENTRIES = 1000; + private Map<ByteSequence,Set<ByteSequence>> cache = new LinkedHashMap<ByteSequence,Set<ByteSequence>>( + MAX_ENTRIES + 1, .75F, true) { + private static final long serialVersionUID = 1L; + + // This method is called just after a new entry has been added + @Override + public boolean removeEldestEntry(Map.Entry<ByteSequence,Set<ByteSequence>> eldest) { + return size() > MAX_ENTRIES; + } + }; + + @Override + public void convert(Key k, Value v, Consumer<ByteSequence> consumer) { + ByteSequence vis = k.getColumnVisibilityData(); + + if (vis.length() > 0) { + Set<ByteSequence> auths = cache.get(vis); + if (auths == null) { + auths = findAuths(vis); + cache.put(new ArrayByteSequence(vis), auths); + } + + for (ByteSequence auth : auths) { + consumer.accept(auth); + } + } + } + + private Set<ByteSequence> findAuths(ByteSequence vis) { + HashSet<ByteSequence> auths = new HashSet<>(); + byte[] expression = vis.toArray(); + Node root = new ColumnVisibility(expression).getParseTree(); + + findAuths(root, expression, auths); + + return auths; + } + + private void findAuths(Node node, byte[] expression, HashSet<ByteSequence> auths) { + switch (node.getType()) { + case AND: + case OR: + for (Node child : node.getChildren()) { + findAuths(child, expression, auths); + } + break; + case TERM: + auths.add(node.getTerm(expression)); + break; + case EMPTY: + break; + default: + throw new IllegalArgumentException("Unknown node type " + node.getType()); + } + } + } +} diff --git a/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/VisibilitySummarizer.java b/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/VisibilitySummarizer.java index 630d42b357..927c9c82f3 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/VisibilitySummarizer.java +++ b/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/VisibilitySummarizer.java @@ -31,6 +31,7 @@ * * @since 2.0.0 * + * @see AuthorizationSummarizer * @see TableOperations#addSummarizers(String, * org.apache.accumulo.core.client.summary.SummarizerConfiguration...) * @see TableOperations#summaries(String) diff --git a/core/src/test/java/org/apache/accumulo/core/client/summary/summarizers/AuthorizationSummarizerTest.java b/core/src/test/java/org/apache/accumulo/core/client/summary/summarizers/AuthorizationSummarizerTest.java new file mode 100644 index 0000000000..5dee90aa23 --- /dev/null +++ b/core/src/test/java/org/apache/accumulo/core/client/summary/summarizers/AuthorizationSummarizerTest.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.core.client.summary.summarizers; + +import static org.apache.accumulo.core.client.summary.CountingSummarizer.COUNTER_STAT_PREFIX; +import static org.apache.accumulo.core.client.summary.CountingSummarizer.DELETES_IGNORED_STAT; +import static org.apache.accumulo.core.client.summary.CountingSummarizer.EMITTED_STAT; +import static org.apache.accumulo.core.client.summary.CountingSummarizer.SEEN_STAT; +import static org.apache.accumulo.core.client.summary.CountingSummarizer.TOO_LONG_STAT; +import static org.apache.accumulo.core.client.summary.CountingSummarizer.TOO_MANY_STAT; +import static org.junit.Assert.assertEquals; + +import java.util.HashMap; + +import org.apache.accumulo.core.client.summary.Summarizer.Collector; +import org.apache.accumulo.core.client.summary.SummarizerConfiguration; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.junit.Test; + +public class AuthorizationSummarizerTest { + + private static final Value EV = new Value(); + + @Test + public void testBasic() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(AuthorizationSummarizer.class) + .build(); + AuthorizationSummarizer authSummarizer = new AuthorizationSummarizer(); + + Collector collector = authSummarizer.collector(sc); + + collector.accept(new Key("r", "f", "q", ""), EV); + collector.accept(new Key("r", "f", "q", "A"), EV); + collector.accept(new Key("r", "f", "q", "B"), EV); + collector.accept(new Key("r", "f", "q", "A&B"), EV); + collector.accept(new Key("r", "f", "q", "(C|D)&(A|B)"), EV); + collector.accept(new Key("r", "f", "q", "(C|D)&(A|B)"), EV); + collector.accept(new Key("r", "f", "q", "(D&E)|(D&C&F)"), EV); + + HashMap<String,Long> actual = new HashMap<>(); + collector.summarize(actual::put); + + String p = COUNTER_STAT_PREFIX; + + HashMap<String,Long> expected = new HashMap<>(); + expected.put(p + "A", 4L); + expected.put(p + "B", 4L); + expected.put(p + "C", 3L); + expected.put(p + "D", 3L); + expected.put(p + "E", 1L); + expected.put(p + "F", 1L); + expected.put(TOO_LONG_STAT, 0L); + expected.put(TOO_MANY_STAT, 0L); + expected.put(SEEN_STAT, 7L); + expected.put(EMITTED_STAT, 16L); + expected.put(DELETES_IGNORED_STAT, 0L); + + assertEquals(expected, actual); + } +} ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
