[
https://issues.apache.org/jira/browse/METRON-1005?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16099368#comment-16099368
]
ASF GitHub Bot commented on METRON-1005:
----------------------------------------
Github user mattf-horton commented on a diff in the pull request:
https://github.com/apache/metron/pull/622#discussion_r129187853
--- Diff:
metron-analytics/metron-profiler-common/src/main/java/org/apache/metron/profiler/hbase/DecodableRowKeyBuilder.java
---
@@ -0,0 +1,402 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.metron.profiler.hbase;
+
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.metron.profiler.ProfileMeasurement;
+import org.apache.metron.profiler.ProfilePeriod;
+
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.TimeUnit;
+
+import static
org.apache.metron.profiler.ProfilerClientConfig.PROFILER_PERIOD;
+import static
org.apache.metron.profiler.ProfilerClientConfig.PROFILER_PERIOD_UNITS;
+import static
org.apache.metron.profiler.ProfilerClientConfig.PROFILER_SALT_DIVISOR;
+
+/**
+ * Responsible for building the row keys used to store profile data in
HBase.
+ *
+ * This builder generates decodable row keys. A decodable row key is one
that can be interrogated to extract
+ * the constituent components of that row key. Given a previously
generated row key this builder
+ * can extract the profile name, entity name, group name(s), period
duration, and period.
+ *
+ * The row key is composed of the following fields.
+ * <ul>
+ * <li>magic number - Helps to validate the row key.</li>
+ * <li>version - The version number of the row key.</li>
+ * <li>salt - A salt that helps prevent hot-spotting.
+ * <li>profile - The name of the profile.
+ * <li>entity - The name of the entity being profiled.
+ * <li>group(s) - The group(s) used to sort the data in HBase. For
example, a group may distinguish between weekends and weekdays.
+ * <li>period - The period in which the measurement was taken. The first
period starts at the epoch and increases monotonically.
+ * </ul>
+ */
+public class DecodableRowKeyBuilder implements RowKeyBuilder {
+
+ /**
+ * Defines the byte order when encoding and decoding the row keys.
+ *
+ * Making this configurable is likely not necessary and is left as a
practice exercise for the reader. :)
+ */
+ private static final ByteOrder byteOrder = ByteOrder.BIG_ENDIAN;
+
+ /**
+ * Defines some level of sane max field length to avoid any shenanigans
with oddly encoded row keys.
+ */
+ private static final int MAX_FIELD_LENGTH = 1000;
+
+ /**
+ * A magic number embedded in each row key to help validate the row key
and byte ordering when decoding.
+ */
+ protected static final short MAGIC_NUMBER = 77;
+
+ /**
+ * The version number of the row keys supported by this builder.
+ */
+ protected static final byte VERSION = (byte) 1;
+
+ /**
+ * A salt can be prepended to the row key to help prevent hot-spotting.
The salt
+ * divisor is used to generate the salt. The salt divisor should be
roughly equal
+ * to the number of nodes in the Hbase cluster.
+ */
+ private int saltDivisor;
+
+ /**
+ * The duration of each profile period in milliseconds.
+ */
+ private long periodDurationMillis;
+
+ public DecodableRowKeyBuilder() {
+ this(PROFILER_SALT_DIVISOR.getDefault(Integer.class),
+ PROFILER_PERIOD.getDefault(Long.class),
+
TimeUnit.valueOf(PROFILER_PERIOD_UNITS.getDefault(String.class)));
+ }
+
+ public DecodableRowKeyBuilder(int saltDivisor, long duration, TimeUnit
units) {
+ this.saltDivisor = saltDivisor;
+ this.periodDurationMillis = units.toMillis(duration);
+ }
+
+ /**
+ * Builds a list of row keys necessary to retrieve profile measurements
over
+ * a time horizon.
+ *
+ * @param profile The name of the profile.
+ * @param entity The name of the entity.
+ * @param groups The group(s) used to sort the profile data.
+ * @param start When the time horizon starts in epoch milliseconds.
+ * @param end When the time horizon ends in epoch milliseconds.
+ * @return All of the row keys necessary to retrieve the profile
measurements.
+ */
+ @Override
+ public List<byte[]> encode(String profile, String entity, List<Object>
groups, long start, long end) {
+ // be forgiving of out-of-order start and end times; order is critical
to this algorithm
+ end = Math.max(start, end);
+ start = Math.min(start, end);
+
+ // find the starting period and advance until the end time is reached
+ return ProfilePeriod.visitPeriods( start
+ , end
+ , periodDurationMillis
+ , TimeUnit.MILLISECONDS
+ , Optional.empty()
+ , period -> encode(profile, entity, groups, period)
+ );
+
+ }
+
+ /**
+ * Builds a list of row keys necessary to retrieve a profile's
measurements over
+ * a time horizon.
+ * <p>
+ * This method is useful when attempting to read ProfileMeasurements
stored in HBase.
+ *
+ * @param profile The name of the profile.
+ * @param entity The name of the entity.
+ * @param groups The group(s) used to sort the profile data.
+ * @param periods The profile measurement periods to compute the
rowkeys for
+ * @return All of the row keys necessary to retrieve the profile
measurements.
+ */
+ @Override
+ public List<byte[]> encode(String profile, String entity, List<Object>
groups, Iterable<ProfilePeriod> periods) {
+ List<byte[]> rowKeys = new ArrayList<>();
+ for(ProfilePeriod period : periods) {
+ rowKeys.add(encode(profile, entity, groups, period));
+ }
+ return rowKeys;
+ }
+
+ /**
+ * Builds the row key for a given profile measurement.
+ * @param m The profile measurement.
+ * @return The HBase row key.
+ */
+ @Override
+ public byte[] encode(ProfileMeasurement m) {
+ return encode(m.getProfileName(), m.getEntity(), m.getGroups(),
m.getPeriod());
+ }
+
+ /**
+ * Build the row key.
+ * @param profile The name of the profile.
+ * @param entity The name of the entity.
+ * @param period The period in which the measurement was taken.
+ * @param groups The groups.
+ * @return The HBase row key.
+ */
+ public byte[] encode(String profile, String entity, List<Object> groups,
ProfilePeriod period) {
+
+ if(profile == null)
+ throw new IllegalArgumentException("Cannot encode row key; invalid
profile name.");
+ if(entity == null)
+ throw new IllegalArgumentException("Cannot encode row key; invalid
entity name.");
+ if(period == null)
+ throw new IllegalArgumentException("Cannot encode row key; invalid
profile period.");
+
+ long periodId = period.getPeriod();
+ long periodDurationMillis = period.getDurationMillis();
+
+ byte[] salt = encodeSalt(periodId, saltDivisor);
+ byte[] profileB = Bytes.toBytes(profile);
+ byte[] entityB = Bytes.toBytes(entity);
+ byte[] groupB = encodeGroups(groups);
+
+ int capacity = Short.BYTES + 1 + salt.length + profileB.length +
entityB.length + groupB.length + (Integer.BYTES * 3) + (Long.BYTES * 2);
+ ByteBuffer buffer = ByteBuffer
+ .allocate(capacity)
+ .order(byteOrder)
+ .putShort(MAGIC_NUMBER)
+ .put(VERSION)
+ .putInt(salt.length)
+ .put(salt)
+ .putInt(profileB.length)
+ .put(profileB)
+ .putInt(entityB.length)
+ .put(entityB)
+ .put(groupB)
--- End diff --
I understand that you don't _need_ a length for groupB, but would recommend
you have it, both for consistency of L/V encoding, and in case we later add a
field after groups.
> Create Decodable Row Key for Profiler
> -------------------------------------
>
> Key: METRON-1005
> URL: https://issues.apache.org/jira/browse/METRON-1005
> Project: Metron
> Issue Type: Improvement
> Affects Versions: 0.3.0
> Reporter: Nick Allen
> Assignee: Nick Allen
> Fix For: Next + 1
>
>
> To be able to answer the types of questions that I outlined in METRON-450, we
> need a row key that is decodable. Right now there is no logic to decode a
> row key, nor is the existing row key easily decodable.
> Once the row keys can be decoded, you could scan all of the row keys in the
> Profiler's HBase table, decode each of them and extract things like, the
> names of all your profiles, the names of entities within a profile, the
> period duration of a given profile.
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)