Repository: metron Updated Branches: refs/heads/master 38d41fe67 -> ebfbe9d5e
METRON-1120 Profile 'groupBy' Expression Has No Reference to Time (nickwallen) closes apache/metron#708 Project: http://git-wip-us.apache.org/repos/asf/metron/repo Commit: http://git-wip-us.apache.org/repos/asf/metron/commit/ebfbe9d5 Tree: http://git-wip-us.apache.org/repos/asf/metron/tree/ebfbe9d5 Diff: http://git-wip-us.apache.org/repos/asf/metron/diff/ebfbe9d5 Branch: refs/heads/master Commit: ebfbe9d5edfc07109bdeaf8ccf88f4ca44221aae Parents: 38d41fe Author: nickwallen <[email protected]> Authored: Thu Aug 24 14:54:32 2017 -0400 Committer: nickallen <[email protected]> Committed: Thu Aug 24 14:54:32 2017 -0400 ---------------------------------------------------------------------- .../metron/profiler/DefaultProfileBuilder.java | 19 +++++-- .../metron/profiler/ProfileMeasurement.java | 7 ++- .../profiler/DefaultProfileBuilderTest.java | 52 +++++++++++++++++++- metron-analytics/metron-profiler/README.md | 14 ++++-- 4 files changed, 82 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/metron/blob/ebfbe9d5/metron-analytics/metron-profiler-common/src/main/java/org/apache/metron/profiler/DefaultProfileBuilder.java ---------------------------------------------------------------------- diff --git a/metron-analytics/metron-profiler-common/src/main/java/org/apache/metron/profiler/DefaultProfileBuilder.java b/metron-analytics/metron-profiler-common/src/main/java/org/apache/metron/profiler/DefaultProfileBuilder.java index 3ceeed7..1f352d0 100644 --- a/metron-analytics/metron-profiler-common/src/main/java/org/apache/metron/profiler/DefaultProfileBuilder.java +++ b/metron-analytics/metron-profiler-common/src/main/java/org/apache/metron/profiler/DefaultProfileBuilder.java @@ -22,11 +22,11 @@ package org.apache.metron.profiler; import static java.lang.String.format; -import com.google.common.collect.ImmutableMap; import java.io.Serializable; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; @@ -147,6 +147,7 @@ public class DefaultProfileBuilder implements ProfileBuilder, Serializable { public Optional<ProfileMeasurement> flush() { LOG.debug("Flushing profile: profile={}, entity={}", profileName, entity); Optional<ProfileMeasurement> result = Optional.empty(); + ProfilePeriod period = new ProfilePeriod(clock.currentTimeMillis(), periodDurationMillis, TimeUnit.MILLISECONDS); try { // execute the 'profile' expression(s) @@ -160,21 +161,30 @@ public class DefaultProfileBuilder implements ProfileBuilder, Serializable { e -> e.getKey(), e -> execute(e.getValue(), "result/triage"))); + // the state that will be made available to the `groupBy` expression + Map<String, Object> state = new HashMap<>(); + state.put("profile", profileName); + state.put("entity", entity); + state.put("start", period.getStartTimeMillis()); + state.put("end", period.getEndTimeMillis()); + state.put("duration", period.getDurationMillis()); + state.put("result", profileValue); + // execute the 'groupBy' expression(s) - can refer to value of 'result' expression - List<Object> groups = execute(definition.getGroupBy(), ImmutableMap.of("result", profileValue), "groupBy"); + List<Object> groups = execute(definition.getGroupBy(), state, "groupBy"); result = Optional.of(new ProfileMeasurement() .withProfileName(profileName) .withEntity(entity) .withGroups(groups) - .withPeriod(clock.currentTimeMillis(), periodDurationMillis, TimeUnit.MILLISECONDS) + .withPeriod(period) .withProfileValue(profileValue) .withTriageValues(triageValues) .withDefinition(definition)); } catch(Throwable e) { // if any of the Stellar expressions fail, a measurement should NOT be returned - LOG.error(format("Unable to flush profile: %s", e.getMessage()), e); + LOG.error(format("Unable to flush profile: error=%s", e.getMessage()), e); } isInitialized = false; @@ -228,7 +238,6 @@ public class DefaultProfileBuilder implements ProfileBuilder, Serializable { return execute(expression, Collections.emptyMap(), expressionType); } - /** * Executes a set of expressions whose results need to be assigned to a variable. * @param expressions Maps the name of a variable to the expression whose result should be assigned to it. http://git-wip-us.apache.org/repos/asf/metron/blob/ebfbe9d5/metron-analytics/metron-profiler-common/src/main/java/org/apache/metron/profiler/ProfileMeasurement.java ---------------------------------------------------------------------- diff --git a/metron-analytics/metron-profiler-common/src/main/java/org/apache/metron/profiler/ProfileMeasurement.java b/metron-analytics/metron-profiler-common/src/main/java/org/apache/metron/profiler/ProfileMeasurement.java index e9ac945..0e773e9 100644 --- a/metron-analytics/metron-profiler-common/src/main/java/org/apache/metron/profiler/ProfileMeasurement.java +++ b/metron-analytics/metron-profiler-common/src/main/java/org/apache/metron/profiler/ProfileMeasurement.java @@ -94,7 +94,12 @@ public class ProfileMeasurement { } public ProfileMeasurement withPeriod(long whenMillis, long periodDuration, TimeUnit periodUnits) { - this.period = new ProfilePeriod(whenMillis, periodDuration, periodUnits); + this.withPeriod(new ProfilePeriod(whenMillis, periodDuration, periodUnits)); + return this; + } + + public ProfileMeasurement withPeriod(ProfilePeriod period) { + this.period = period; return this; } http://git-wip-us.apache.org/repos/asf/metron/blob/ebfbe9d5/metron-analytics/metron-profiler-common/src/test/java/org/apache/metron/profiler/DefaultProfileBuilderTest.java ---------------------------------------------------------------------- diff --git a/metron-analytics/metron-profiler-common/src/test/java/org/apache/metron/profiler/DefaultProfileBuilderTest.java b/metron-analytics/metron-profiler-common/src/test/java/org/apache/metron/profiler/DefaultProfileBuilderTest.java index 7c8664a..71ef982 100644 --- a/metron-analytics/metron-profiler-common/src/test/java/org/apache/metron/profiler/DefaultProfileBuilderTest.java +++ b/metron-analytics/metron-profiler-common/src/test/java/org/apache/metron/profiler/DefaultProfileBuilderTest.java @@ -23,6 +23,7 @@ package org.apache.metron.profiler; import org.adrianwalker.multilinestring.Multiline; import org.apache.metron.common.configuration.profiler.ProfileConfig; import org.apache.metron.common.utils.JSONUtils; +import org.apache.metron.profiler.clock.Clock; import org.apache.metron.profiler.clock.FixedClock; import org.apache.metron.stellar.dsl.Context; import org.json.simple.JSONObject; @@ -47,7 +48,8 @@ public class DefaultProfileBuilderTest { * { * "ip_src_addr": "10.0.0.1", * "ip_dst_addr": "10.0.0.20", - * "value": 100 + * "value": 100, + * "timestamp": "2017-08-18 09:00:00" * } */ @Multiline @@ -282,6 +284,54 @@ public class DefaultProfileBuilderTest { /** * { + * "profile": "test-profile", + * "foreach": "ip_src_addr", + * "init": { "x": "100" }, + * "groupBy": ["profile","entity","start","end","duration","result"], + * "result": "100" + * } + */ + @Multiline + private String testStateAvailableToGroupBy; + + /** + * The 'groupBy' expression should be able to reference information about the profile including + * the profile name, entity name, start of period, end of period, duration, and result. + */ + @Test + public void testStateAvailableToGroupBy() throws Exception { + FixedClock clock = new FixedClock(); + clock.setTime(1503081070340L); + long periodDurationMillis = TimeUnit.MINUTES.toMillis(10); + ProfilePeriod period = new ProfilePeriod(clock.currentTimeMillis(), 10, TimeUnit.MINUTES); + + // setup + definition = JSONUtils.INSTANCE.load(testStateAvailableToGroupBy, ProfileConfig.class); + builder = new DefaultProfileBuilder.Builder() + .withDefinition(definition) + .withEntity("10.0.0.1") + .withPeriodDuration(10, TimeUnit.MINUTES) + .withContext(Context.EMPTY_CONTEXT()) + .withClock(clock) + .build(); + + // execute + builder.apply(message); + Optional<ProfileMeasurement> m = builder.flush(); + assertTrue(m.isPresent()); + + // validate all values that should be accessible by the groupBy expression(s) + assertEquals(6, m.get().getGroups().size()); + assertEquals("invalid profile", "test-profile", m.get().getGroups().get(0)); + assertEquals("invalid entity", "10.0.0.1", m.get().getGroups().get(1)); + assertEquals("invalid start", period.getStartTimeMillis(), m.get().getGroups().get(2)); + assertEquals("invalid end", period.getEndTimeMillis(), m.get().getGroups().get(3)); + assertEquals("invalid duration", period.getDurationMillis(), m.get().getGroups().get(4)); + assertEquals("invalid result", 100, m.get().getGroups().get(5)); + } + + /** + * { * "profile": "test", * "foreach": "ip_src_addr", * "init": { http://git-wip-us.apache.org/repos/asf/metron/blob/ebfbe9d5/metron-analytics/metron-profiler/README.md ---------------------------------------------------------------------- diff --git a/metron-analytics/metron-profiler/README.md b/metron-analytics/metron-profiler/README.md index 66c5557..f27af59 100644 --- a/metron-analytics/metron-profiler/README.md +++ b/metron-analytics/metron-profiler/README.md @@ -191,14 +191,22 @@ An expression that determines if a message should be applied to the profile. A *Optional* -One or more Stellar expressions used to group the profile measurements when persisted. This is intended to sort the Profile data to allow for a contiguous scan when accessing subsets of the data. +One or more Stellar expressions used to group the profile measurements when persisted. This can be used to sort the Profile data to allow for a contiguous scan when accessing subsets of the data. This is also one way to deal with calendar effects. For example, where activity on a weekday can be very different from a weekend. -The 'groupBy' expressions can refer to any field within a `org.apache.metron.profiler.ProfileMeasurement`. A common use case would be grouping by day of week. This allows a contiguous scan to access all profile data for Mondays only. Using the following definition would achieve this. +A common use case would be grouping by day of week. This allows a contiguous scan to access all profile data for Mondays only. Using the following definition would achieve this. ``` -"groupBy": [ "DAY_OF_WEEK()" ] +"groupBy": [ "DAY_OF_WEEK(start)" ] ``` +The expression can reference any of these variables. +* `profile` The name of the profile. +* `entity` The name of the entity being profiled. +* `start` The start time of the profile period in epoch milliseconds. +* `end` The end time of the profile period in epoch milliseconds. +* `duration` The duration of the profile period in milliseconds. +* `result` The result of executing the `result` expression. + ### `init` *Optional*
