Repository: metron Updated Branches: refs/heads/master 0bbc51d68 -> dd7118197
METRON-1142: Add Geo Hashing functions to stellar closes apache/incubator-metron#724 Project: http://git-wip-us.apache.org/repos/asf/metron/repo Commit: http://git-wip-us.apache.org/repos/asf/metron/commit/dd711819 Tree: http://git-wip-us.apache.org/repos/asf/metron/tree/dd711819 Diff: http://git-wip-us.apache.org/repos/asf/metron/diff/dd711819 Branch: refs/heads/master Commit: dd7118197ea09550192d82161cda4e0614a62b35 Parents: 0bbc51d Author: cstella <[email protected]> Authored: Fri Sep 8 09:17:58 2017 -0400 Committer: cstella <[email protected]> Committed: Fri Sep 8 09:17:58 2017 -0400 ---------------------------------------------------------------------- metron-analytics/metron-maas-service/pom.xml | 10 + metron-analytics/metron-profiler-client/pom.xml | 12 +- metron-analytics/metron-profiler/pom.xml | 10 + metron-analytics/metron-statistics/pom.xml | 10 + metron-platform/elasticsearch-shaded/pom.xml | 10 + metron-platform/metron-api/pom.xml | 10 + metron-platform/metron-common/pom.xml | 10 + metron-platform/metron-data-management/pom.xml | 12 +- metron-platform/metron-elasticsearch/pom.xml | 10 + metron-platform/metron-enrichment/pom.xml | 27 ++ .../adapters/geo/GeoLiteDatabase.java | 77 ++++- .../adapters/geo/hash/DistanceStrategies.java | 46 +++ .../adapters/geo/hash/DistanceStrategy.java | 24 ++ .../adapters/geo/hash/GeoHashUtil.java | 189 +++++++++++ .../enrichment/stellar/GeoHashFunctions.java | 299 ++++++++++++++++ .../stellar/GeoHashFunctionsTest.java | 337 +++++++++++++++++++ metron-platform/metron-hbase-client/pom.xml | 10 + metron-platform/metron-indexing/pom.xml | 10 + metron-platform/metron-management/pom.xml | 10 + metron-platform/metron-parsers/pom.xml | 10 + metron-platform/metron-pcap-backend/pom.xml | 10 + metron-platform/metron-solr/pom.xml | 10 + metron-platform/metron-writer/pom.xml | 10 + metron-stellar/stellar-common/README.md | 50 +++ metron-stellar/stellar-common/pom.xml | 10 + use-cases/README.md | 4 + use-cases/geographic_login_outliers/README.md | 267 +++++++++++++++ 27 files changed, 1483 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-analytics/metron-maas-service/pom.xml ---------------------------------------------------------------------- diff --git a/metron-analytics/metron-maas-service/pom.xml b/metron-analytics/metron-maas-service/pom.xml index 4eeceae..555e73d 100644 --- a/metron-analytics/metron-maas-service/pom.xml +++ b/metron-analytics/metron-maas-service/pom.xml @@ -252,6 +252,16 @@ <configuration> <shadedArtifactAttached>true</shadedArtifactAttached> <shadedClassifierName>uber</shadedClassifierName> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <transformers> <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer"> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-analytics/metron-profiler-client/pom.xml ---------------------------------------------------------------------- diff --git a/metron-analytics/metron-profiler-client/pom.xml b/metron-analytics/metron-profiler-client/pom.xml index bba881d..69b8c29 100644 --- a/metron-analytics/metron-profiler-client/pom.xml +++ b/metron-analytics/metron-profiler-client/pom.xml @@ -304,7 +304,17 @@ <pattern>com.google.common</pattern> <shadedPattern>org.apache.metron.guava</shadedPattern> </relocation> - </relocations> + </relocations> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <transformers> <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer"> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-analytics/metron-profiler/pom.xml ---------------------------------------------------------------------- diff --git a/metron-analytics/metron-profiler/pom.xml b/metron-analytics/metron-profiler/pom.xml index 41888a1..e1ee806 100644 --- a/metron-analytics/metron-profiler/pom.xml +++ b/metron-analytics/metron-profiler/pom.xml @@ -305,6 +305,16 @@ <configuration> <shadedArtifactAttached>true</shadedArtifactAttached> <shadedClassifierName>uber</shadedClassifierName> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.google.common</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-analytics/metron-statistics/pom.xml ---------------------------------------------------------------------- diff --git a/metron-analytics/metron-statistics/pom.xml b/metron-analytics/metron-statistics/pom.xml index 5fab63e..b4d2ed6 100644 --- a/metron-analytics/metron-statistics/pom.xml +++ b/metron-analytics/metron-statistics/pom.xml @@ -74,6 +74,16 @@ <goal>shade</goal> </goals> <configuration> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.tdunning</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/elasticsearch-shaded/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/elasticsearch-shaded/pom.xml b/metron-platform/elasticsearch-shaded/pom.xml index bf02510..bbf96a0 100644 --- a/metron-platform/elasticsearch-shaded/pom.xml +++ b/metron-platform/elasticsearch-shaded/pom.xml @@ -89,6 +89,16 @@ <goal>shade</goal> </goals> <configuration> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.google.common</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-api/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/metron-api/pom.xml b/metron-platform/metron-api/pom.xml index 912859d..8a15251 100644 --- a/metron-platform/metron-api/pom.xml +++ b/metron-platform/metron-api/pom.xml @@ -221,6 +221,16 @@ <goal>shade</goal> </goals> <configuration> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.google.common</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-common/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/metron-common/pom.xml b/metron-platform/metron-common/pom.xml index 390ec23..9356e13 100644 --- a/metron-platform/metron-common/pom.xml +++ b/metron-platform/metron-common/pom.xml @@ -403,6 +403,16 @@ <goal>shade</goal> </goals> <configuration> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.google.common</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-data-management/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/metron-data-management/pom.xml b/metron-platform/metron-data-management/pom.xml index 90c2c52..3fccc0a 100644 --- a/metron-platform/metron-data-management/pom.xml +++ b/metron-platform/metron-data-management/pom.xml @@ -384,7 +384,17 @@ <goal>shade</goal> </goals> <configuration> - <createDependencyReducedPom>false</createDependencyReducedPom> + <createDependencyReducedPom>false</createDependencyReducedPom> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.google.common</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-elasticsearch/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/metron-elasticsearch/pom.xml b/metron-platform/metron-elasticsearch/pom.xml index 40989c6..0005484 100644 --- a/metron-platform/metron-elasticsearch/pom.xml +++ b/metron-platform/metron-elasticsearch/pom.xml @@ -242,6 +242,16 @@ <configuration> <shadedArtifactAttached>true</shadedArtifactAttached> <shadedClassifierName>uber</shadedClassifierName> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.google.common</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-enrichment/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/metron-enrichment/pom.xml b/metron-platform/metron-enrichment/pom.xml index 37cb49f..dd3998b 100644 --- a/metron-platform/metron-enrichment/pom.xml +++ b/metron-platform/metron-enrichment/pom.xml @@ -94,6 +94,23 @@ <scope>provided</scope> </dependency> <dependency> + <groupId>ch.hsr</groupId> + <artifactId>geohash</artifactId> + <version>1.3.0</version> + </dependency> + <dependency> + <groupId>org.locationtech.spatial4j</groupId> + <artifactId>spatial4j</artifactId> + <version>0.6</version> + <exclusions> + <exclusion> + <groupId>com.vividsolutions</groupId> + <artifactId>jts-core</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> <groupId>com.maxmind.geoip2</groupId> <artifactId>geoip2</artifactId> <version>${geoip.version}</version> @@ -313,6 +330,16 @@ <configuration> <shadedArtifactAttached>true</shadedArtifactAttached> <shadedClassifierName>uber</shadedClassifierName> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.fasterxml.jackson</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/GeoLiteDatabase.java ---------------------------------------------------------------------- diff --git a/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/GeoLiteDatabase.java b/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/GeoLiteDatabase.java index 0f9bf37..f5d20f7 100644 --- a/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/GeoLiteDatabase.java +++ b/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/GeoLiteDatabase.java @@ -17,6 +17,7 @@ */ package org.apache.metron.enrichment.adapters.geo; +import ch.hsr.geohash.WGS84Point; import com.maxmind.db.CHMCache; import com.maxmind.geoip2.DatabaseReader; import com.maxmind.geoip2.exception.AddressNotFoundException; @@ -35,11 +36,16 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Function; +import java.util.function.Supplier; import java.util.zip.GZIPInputStream; + +import org.apache.commons.lang3.StringUtils; import org.apache.commons.validator.routines.InetAddressValidator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.metron.stellar.common.utils.ConversionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,6 +63,42 @@ public enum GeoLiteDatabase { private static volatile String hdfsLoc = GEO_HDFS_FILE_DEFAULT; private static DatabaseReader reader = null; + public enum GeoProps { + LOC_ID("locID"), + COUNTRY("country"), + CITY("city"), + POSTAL_CODE("postalCode"), + DMA_CODE("dmaCode"), + LATITUDE("latitude"), + LONGITUDE("longitude"), + LOCATION_POINT("location_point"), + ; + Function<Map<String, String>, String> getter; + String simpleName; + + GeoProps(String simpleName) { + this(simpleName, m -> m.get(simpleName)); + } + + GeoProps(String simpleName, + Function<Map<String, String>, String> getter + ) { + this.simpleName = simpleName; + this.getter = getter; + } + public String getSimpleName() { + return simpleName; + } + + public String get(Map<String, String> map) { + return getter.apply(map); + } + + public void set(Map<String, String> map, String val) { + map.put(simpleName, val); + } + } + public synchronized void updateIfNecessary(Map<String, Object> globalConfig) { // Reload database if necessary (file changes on HDFS) LOG.trace("[Metron] Determining if GeoIpDatabase update required"); @@ -143,24 +185,24 @@ public enum GeoLiteDatabase { Postal postal = cityResponse.getPostal(); Location location = cityResponse.getLocation(); - geoInfo.put("locID", convertNullToEmptyString(city.getGeoNameId())); - geoInfo.put("country", convertNullToEmptyString(country.getIsoCode())); - geoInfo.put("city", convertNullToEmptyString(city.getName())); - geoInfo.put("postalCode", convertNullToEmptyString(postal.getCode())); - geoInfo.put("dmaCode", convertNullToEmptyString(location.getMetroCode())); + GeoProps.LOC_ID.set(geoInfo, convertNullToEmptyString(city.getGeoNameId())); + GeoProps.COUNTRY.set(geoInfo, convertNullToEmptyString(country.getIsoCode())); + GeoProps.CITY.set(geoInfo, convertNullToEmptyString(city.getName())); + GeoProps.POSTAL_CODE.set(geoInfo, convertNullToEmptyString(postal.getCode())); + GeoProps.DMA_CODE.set(geoInfo, convertNullToEmptyString(location.getMetroCode())); Double latitudeRaw = location.getLatitude(); String latitude = convertNullToEmptyString(latitudeRaw); - geoInfo.put("latitude", latitude); + GeoProps.LATITUDE.set(geoInfo, latitude); Double longitudeRaw = location.getLongitude(); String longitude = convertNullToEmptyString(longitudeRaw); - geoInfo.put("longitude", longitude); + GeoProps.LONGITUDE.set(geoInfo, longitude); if (latitudeRaw == null || longitudeRaw == null) { - geoInfo.put("location_point", ""); + GeoProps.LOCATION_POINT.set(geoInfo, ""); } else { - geoInfo.put("location_point", latitude + "," + longitude); + GeoProps.LOCATION_POINT.set(geoInfo, latitude + "," + longitude); } return Optional.of(geoInfo); @@ -174,6 +216,23 @@ public enum GeoLiteDatabase { return Optional.empty(); } + public Optional<WGS84Point> toPoint(Map<String, String> geoInfo) { + String latitude = GeoProps.LATITUDE.get(geoInfo); + String longitude = GeoProps.LONGITUDE.get(geoInfo); + if(latitude == null || longitude == null) { + return Optional.empty(); + } + + try { + double latD = Double.parseDouble(latitude.toString()); + double longD = Double.parseDouble(longitude.toString()); + return Optional.of(new WGS84Point(latD, longD)); + } catch (NumberFormatException nfe) { + LOG.warn(String.format("Invalid lat/long: %s/%s: %s", latitude, longitude, nfe.getMessage()), nfe); + return Optional.empty(); + } + } + protected String convertNullToEmptyString(Object raw) { return raw == null ? "" : String.valueOf(raw); } http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/hash/DistanceStrategies.java ---------------------------------------------------------------------- diff --git a/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/hash/DistanceStrategies.java b/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/hash/DistanceStrategies.java new file mode 100644 index 0000000..6af214e --- /dev/null +++ b/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/hash/DistanceStrategies.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.metron.enrichment.adapters.geo.hash; + +import ch.hsr.geohash.WGS84Point; +import org.locationtech.spatial4j.distance.DistanceUtils; + +public enum DistanceStrategies implements DistanceStrategy { + HAVERSINE((p1, p2) -> DistanceUtils.EARTH_MEAN_RADIUS_KM*DistanceUtils.distHaversineRAD( Math.toRadians(p1.getLatitude()), Math.toRadians(p1.getLongitude()) + , Math.toRadians(p2.getLatitude()), Math.toRadians(p2.getLongitude()) + ) + ), + LAW_OF_COSINES((p1, p2) -> DistanceUtils.EARTH_MEAN_RADIUS_KM*DistanceUtils.distLawOfCosinesRAD( Math.toRadians(p1.getLatitude()), Math.toRadians(p1.getLongitude()) + , Math.toRadians(p2.getLatitude()), Math.toRadians(p2.getLongitude()) + ) + ), + VICENTY((p1, p2) -> DistanceUtils.EARTH_MEAN_RADIUS_KM*DistanceUtils.distVincentyRAD( Math.toRadians(p1.getLatitude()), Math.toRadians(p1.getLongitude()) + , Math.toRadians(p2.getLatitude()), Math.toRadians(p2.getLongitude()) + ) + ) + ; + DistanceStrategy strat; + DistanceStrategies(DistanceStrategy strat) { + this.strat = strat; + } + + @Override + public double distance(WGS84Point point1, WGS84Point point2) { + return strat.distance(point1, point2); + } +} http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/hash/DistanceStrategy.java ---------------------------------------------------------------------- diff --git a/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/hash/DistanceStrategy.java b/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/hash/DistanceStrategy.java new file mode 100644 index 0000000..0303986 --- /dev/null +++ b/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/hash/DistanceStrategy.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.metron.enrichment.adapters.geo.hash; + +import ch.hsr.geohash.WGS84Point; + +public interface DistanceStrategy { + public double distance(WGS84Point point1, WGS84Point point2); +} http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/hash/GeoHashUtil.java ---------------------------------------------------------------------- diff --git a/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/hash/GeoHashUtil.java b/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/hash/GeoHashUtil.java new file mode 100644 index 0000000..902eea3 --- /dev/null +++ b/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/hash/GeoHashUtil.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.metron.enrichment.adapters.geo.hash; + +import ch.hsr.geohash.GeoHash; +import ch.hsr.geohash.WGS84Point; +import com.google.common.collect.Iterables; +import org.apache.metron.enrichment.adapters.geo.GeoLiteDatabase; + +import java.util.AbstractMap; +import java.util.Map; +import java.util.Optional; + +public enum GeoHashUtil { + INSTANCE; + + public Optional<String> computeHash(Double latitude, Double longitude, int precision) { + if(latitude == null || longitude == null) { + return Optional.empty(); + } + return computeHash(new WGS84Point(latitude, longitude), precision); + } + + public Optional<String> computeHash(WGS84Point point, int precision) { + GeoHash hash = GeoHash.withCharacterPrecision(point.getLatitude(), point.getLongitude(), precision); + return Optional.of(hash.toBase32()); + } + + public Optional<String> computeHash(Map<String, String> geoLoc, int precision) { + Optional<WGS84Point> point = GeoLiteDatabase.INSTANCE.toPoint(geoLoc); + if(point.isPresent()) { + return computeHash(point.get(), precision); + } + else { + return Optional.empty(); + } + } + + public Optional<WGS84Point> toPoint(String hash) { + if(hash == null) { + return Optional.empty(); + } + GeoHash h = GeoHash.fromGeohashString(hash); + return Optional.ofNullable(h == null?null:h.getPoint()); + } + + public double distance(WGS84Point point1, WGS84Point point2, DistanceStrategy strategy) { + return strategy.distance(point1, point2); + } + + public WGS84Point centroidOfHashes(Iterable<String> hashes) { + Iterable<WGS84Point> points = Iterables.transform(hashes, h -> toPoint(h).orElse(null)); + return centroidOfPoints(points); + } + + public WGS84Point centroidOfPoints(Iterable<WGS84Point> points) { + Iterable<WGS84Point> nonNullPoints = Iterables.filter(points, p -> p != null); + return centroid(Iterables.transform(nonNullPoints + , p -> new AbstractMap.SimpleImmutableEntry<>(p, 1) + ) + ); + } + + public WGS84Point centroidOfWeightedPoints(Map<String, Number> points) { + + Iterable<Map.Entry<WGS84Point, Number>> weightedPoints = Iterables.transform(points.entrySet() + , kv -> { + WGS84Point pt = toPoint(kv.getKey()).orElse(null); + return new AbstractMap.SimpleImmutableEntry<>(pt, kv.getValue()); + }); + return centroid(Iterables.filter(weightedPoints, kv -> kv.getKey() != null)); + } + + /** + * Find the equilibrium point of a weighted set of lat/long geo points. + * @param points The points and their weights (e.g. multiplicity) + * @return + */ + private WGS84Point centroid(Iterable<Map.Entry<WGS84Point, Number>> points) { + double x = 0d + , y = 0d + , z = 0d + , totalWeight = 0d + ; + int n = 0; + /** + * So, it's first important to realize that long/lat are not cartesian, so simple weighted averaging + * is insufficient here as it denies the fact that we're not living on a flat square, but rather the surface of + * an ellipsoid. A crow, for instance, does not fly a straight line to an observer outside of Earth, but + * rather flies across the arc tracing the surface of earth, or a "great-earth arc". When computing the centroid + * you want to find the centroid of the points with distance defined as the great-earth arc. + * + * The general strategy is to: + * 1. Change coordinate systems from degrees on a WGS84 projection (e.g. lat/long) + * to a 3 dimensional cartesian surface atop a sphere approximating the earth. + * 2. Compute a weighted average of the cartesian coordinates + * 3. Change coordinate systems of the resulting centroid in cartesian space back to lat/long + * + * This is generally detailed at http://www.geomidpoint.com/example.html + */ + for(Map.Entry<WGS84Point, Number> weightedPoint : points) { + WGS84Point pt = weightedPoint.getKey(); + if(pt == null) { + continue; + } + double latRad = Math.toRadians(pt.getLatitude()); + double longRad = Math.toRadians(pt.getLongitude()); + double cosLat = Math.cos(latRad); + /* + Convert from lat/long coordinates to cartesian coordinates. The cartesian coordinate system is a right-hand, + rectangular, three-dimensional, earth-fixed coordinate system + with an origin at (0, 0, 0). The Z-axis, is parrallel to the axis of rotation of the earth. The Z-coordinate + is positive toward the North pole. The X-Y plane lies in the equatorial plane. The X-axis lies along the + intersection of the plane containing the prime meridian and the equatorial plane. The X-coordinate is positive + toward the intersection of the prime meridian and equator. + + Please see https://en.wikipedia.org/wiki/Geographic_coordinate_conversion#From_geodetic_to_ECEF_coordinates + for more information about this coordinate transformation. + */ + double ptX = cosLat * Math.cos(longRad); + double ptY = cosLat * Math.sin(longRad); + double ptZ = Math.sin(latRad); + double weight = weightedPoint.getValue().doubleValue(); + x += ptX*weight; + y += ptY*weight; + z += ptZ*weight; + n++; + totalWeight += weight; + } + if(n == 0) { + return null; + } + //average the vector representation in cartesian space, forming the center of gravity in cartesian space + x /= totalWeight; + y /= totalWeight; + z /= totalWeight; + + //convert the cartesian representation back to radians + double longitude = Math.atan2(y, x); + double hypotenuse = Math.sqrt(x*x + y*y); + double latitude = Math.atan2(z, hypotenuse); + + //convert the radians back to degrees latitude and longitude. + return new WGS84Point(Math.toDegrees(latitude), Math.toDegrees(longitude)); + } + + public double maxDistanceHashes(Iterable<String> hashes, DistanceStrategy strategy) { + Iterable<WGS84Point> points = Iterables.transform(hashes, s -> toPoint(s).orElse(null)); + return maxDistancePoints(Iterables.filter(points, p -> p != null), strategy); + } + + public double maxDistancePoints(Iterable<WGS84Point> points, DistanceStrategy strategy) { + //Note: because distance is commutative, we only need search the upper triangle + int i = 0; + double max = Double.NaN; + for(WGS84Point pt1 : points) { + int j = 0; + for(WGS84Point pt2 : points) { + if(j <= i) { + double d = strategy.distance(pt1, pt2); + if(Double.isNaN(max)|| d > max) { + max = d; + } + j++; + } + else { + break; + } + } + i++; + } + return max; + } +} http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/stellar/GeoHashFunctions.java ---------------------------------------------------------------------- diff --git a/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/stellar/GeoHashFunctions.java b/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/stellar/GeoHashFunctions.java new file mode 100644 index 0000000..a1e64c5 --- /dev/null +++ b/metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/stellar/GeoHashFunctions.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.metron.enrichment.stellar; + +import ch.hsr.geohash.WGS84Point; +import org.apache.metron.enrichment.adapters.geo.GeoLiteDatabase; +import org.apache.metron.enrichment.adapters.geo.hash.DistanceStrategies; +import org.apache.metron.enrichment.adapters.geo.hash.DistanceStrategy; +import org.apache.metron.enrichment.adapters.geo.hash.GeoHashUtil; +import org.apache.metron.stellar.common.utils.ConversionUtils; +import org.apache.metron.stellar.dsl.Context; +import org.apache.metron.stellar.dsl.ParseException; +import org.apache.metron.stellar.dsl.Stellar; +import org.apache.metron.stellar.dsl.StellarFunction; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.invoke.MethodHandles; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +public class GeoHashFunctions { + private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + @Stellar(name="TO_LATLONG" + ,namespace="GEOHASH" + ,description="Compute the lat/long of a given [geohash](https://en.wikipedia.org/wiki/Geohash)" + ,params = { + "hash - The [geohash](https://en.wikipedia.org/wiki/Geohash)" + } + ,returns = "A map containing the latitude and longitude of the hash (keys \"latitude\" and \"longitude\")" + ) + public static class ToLatLong implements StellarFunction { + + @Override + public Object apply(List<Object> args, Context context) throws ParseException { + if(args.size() < 1) { + return null; + } + String hash = (String)args.get(0); + if(hash == null) { + return null; + } + + Optional<WGS84Point> point = GeoHashUtil.INSTANCE.toPoint(hash); + if(point.isPresent()) { + Map<String, Object> ret = new HashMap<>(); + ret.put(GeoLiteDatabase.GeoProps.LONGITUDE.getSimpleName(), point.get().getLongitude()); + ret.put(GeoLiteDatabase.GeoProps.LATITUDE.getSimpleName(), point.get().getLatitude()); + return ret; + } + return null; + } + + @Override + public void initialize(Context context) { + + } + + @Override + public boolean isInitialized() { + return true; + } + } + + @Stellar(name="FROM_LATLONG" + ,namespace="GEOHASH" + ,description="Compute [geohash](https://en.wikipedia.org/wiki/Geohash) given a lat/long" + ,params = { + "latitude - The latitude", + "longitude - The longitude", + "character_precision? - The number of characters to use in the hash. Default is 12" + } + ,returns = "A [geohash](https://en.wikipedia.org/wiki/Geohash) of the lat/long" + ) + public static class FromLatLong implements StellarFunction { + + @Override + public Object apply(List<Object> args, Context context) throws ParseException { + if(args.size() < 2) { + return null; + } + Object latObj = args.get(0); + Object longObj = args.get(1); + if(latObj == null || longObj == null) { + return null; + } + Double latitude = ConversionUtils.convert(latObj, Double.class); + Double longitude = ConversionUtils.convert(longObj, Double.class); + int charPrecision = 12; + if(args.size() > 2) { + charPrecision = ConversionUtils.convert(args.get(2), Integer.class); + } + Optional<String> ret = GeoHashUtil.INSTANCE.computeHash(latitude, longitude, charPrecision); + return ret.orElse(null); + } + + @Override + public void initialize(Context context) { + + } + + @Override + public boolean isInitialized() { + return true; + } + } + + @Stellar(name="FROM_LOC" + ,namespace="GEOHASH" + ,description="Compute [geohash](https://en.wikipedia.org/wiki/Geohash) given a geo enrichment location" + ,params = { + "map - the latitude and logitude in a map (the output of GEO_GET)", + "character_precision? - The number of characters to use in the hash. Default is 12" + } + ,returns = "A [geohash](https://en.wikipedia.org/wiki/Geohash) of the location" + ) + public static class FromLoc implements StellarFunction { + + @Override + public Object apply(List<Object> args, Context context) throws ParseException { + if(args.size() < 1) { + return null; + } + Map<String, String> map = (Map<String, String>) args.get(0); + if(map == null) { + return null; + } + int charPrecision = 12; + if(args.size() > 1) { + charPrecision = ConversionUtils.convert(args.get(1), Integer.class); + } + Optional<String> ret = GeoHashUtil.INSTANCE.computeHash(map, charPrecision); + return ret.orElse(null); + } + + @Override + public void initialize(Context context) { + + } + + @Override + public boolean isInitialized() { + return true; + } + } + + + @Stellar(name="DIST" + ,namespace="GEOHASH" + ,description="Compute the distance between [geohashes](https://en.wikipedia.org/wiki/Geohash)" + ,params = { + "hash1 - The first location as a geohash", + "hash2 - The second location as a geohash", + "strategy? - The great circle distance strategy to use. One of [HAVERSINE](https://en.wikipedia.org/wiki/Haversine_formula), [LAW_OF_COSINES](https://en.wikipedia.org/wiki/Law_of_cosines#Using_the_distance_formula), or [VICENTY](https://en.wikipedia.org/wiki/Vincenty%27s_formulae). Haversine is default." + } + ,returns = "The distance in kilometers between the hashes" + ) + public static class Dist implements StellarFunction { + + @Override + public Object apply(List<Object> args, Context context) throws ParseException { + if(args.size() < 2) { + return null; + } + String hash1 = (String)args.get(0); + if(hash1 == null) { + return null; + } + Optional<WGS84Point> pt1 = GeoHashUtil.INSTANCE.toPoint(hash1); + String hash2 = (String)args.get(1); + if(hash2 == null) { + return null; + } + Optional<WGS84Point> pt2 = GeoHashUtil.INSTANCE.toPoint(hash2); + DistanceStrategy strat = DistanceStrategies.HAVERSINE; + if(args.size() > 2) { + strat = DistanceStrategies.valueOf((String) args.get(2)); + } + if(pt1.isPresent() && pt2.isPresent()) { + return GeoHashUtil.INSTANCE.distance(pt1.get(), pt2.get(), strat); + } + return Double.NaN; + } + + @Override + public void initialize(Context context) { + + } + + @Override + public boolean isInitialized() { + return true; + } + } + + @Stellar(name="MAX_DIST" + ,namespace="GEOHASH" + ,description="Compute the maximum distance among a list of [geohashes](https://en.wikipedia.org/wiki/Geohash)" + ,params = { + "hashes - A collection of [geohashes](https://en.wikipedia.org/wiki/Geohash)", + "strategy? - The great circle distance strategy to use. One of [HAVERSINE](https://en.wikipedia.org/wiki/Haversine_formula), [LAW_OF_COSINES](https://en.wikipedia.org/wiki/Law_of_cosines#Using_the_distance_formula), or [VICENTY](https://en.wikipedia.org/wiki/Vincenty%27s_formulae). Haversine is default." + } + ,returns = "The maximum distance in kilometers between any two locations" + ) + public static class MaxDist implements StellarFunction { + + @Override + public Object apply(List<Object> args, Context context) throws ParseException { + if(args.size() < 1) { + return null; + } + Iterable<String> hashes = (Iterable<String>)args.get(0); + if(hashes == null) { + return null; + } + DistanceStrategy strat = DistanceStrategies.HAVERSINE; + if(args.size() > 1) { + strat = DistanceStrategies.valueOf((String) args.get(1)); + } + return GeoHashUtil.INSTANCE.maxDistanceHashes(hashes, strat); + } + + @Override + public void initialize(Context context) { + + } + + @Override + public boolean isInitialized() { + return true; + } + } + + @Stellar(name="CENTROID" + ,namespace="GEOHASH" + ,description="Compute the centroid (geographic midpoint or center of gravity) of a set of [geohashes](https://en.wikipedia.org/wiki/Geohash)" + ,params = { + "hashes - A collection of [geohashes](https://en.wikipedia.org/wiki/Geohash) or a map associating geohashes to numeric weights" + ,"character_precision? - The number of characters to use in the hash. Default is 12" + } + ,returns = "The geohash of the centroid" + ) + public static class Centroid implements StellarFunction { + + @Override + public Object apply(List<Object> args, Context context) throws ParseException { + if(args.size() < 1) { + return null; + } + Object o1 = args.get(0); + if(o1 == null) { + return null; + } + WGS84Point centroid = null; + if(o1 instanceof Map) { + centroid = GeoHashUtil.INSTANCE.centroidOfWeightedPoints((Map<String, Number>)o1); + } + else if(o1 instanceof Iterable) { + centroid = GeoHashUtil.INSTANCE.centroidOfHashes((Iterable<String>)o1); + } + if(centroid == null) { + return null; + } + Integer precision = 12; + if(args.size() > 1) { + precision = (Integer)args.get(1); + } + return GeoHashUtil.INSTANCE.computeHash(centroid, precision).orElse(null); + } + + @Override + public void initialize(Context context) { + + } + + @Override + public boolean isInitialized() { + return true; + } + } +} http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-enrichment/src/test/java/org/apache/metron/enrichment/stellar/GeoHashFunctionsTest.java ---------------------------------------------------------------------- diff --git a/metron-platform/metron-enrichment/src/test/java/org/apache/metron/enrichment/stellar/GeoHashFunctionsTest.java b/metron-platform/metron-enrichment/src/test/java/org/apache/metron/enrichment/stellar/GeoHashFunctionsTest.java new file mode 100644 index 0000000..f1a0ec4 --- /dev/null +++ b/metron-platform/metron-enrichment/src/test/java/org/apache/metron/enrichment/stellar/GeoHashFunctionsTest.java @@ -0,0 +1,337 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.metron.enrichment.stellar; + +import ch.hsr.geohash.WGS84Point; +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.metron.stellar.common.utils.StellarProcessorUtils; +import org.junit.Assert; +import org.junit.Test; + +import java.util.*; + +public class GeoHashFunctionsTest { + public static WGS84Point empireStatePoint = new WGS84Point(40.748570, -73.985752); + public static WGS84Point mosconeCenterPoint = new WGS84Point(37.782891, -122.404166); + public static WGS84Point jutlandPoint = new WGS84Point(57.64911, 10.40740); + public static String explicitJutlandHash = "u4pruydqmvpb"; + String empireStateHash = (String)StellarProcessorUtils.run("GEOHASH_FROM_LATLONG(lat, long)" + , ImmutableMap.of("lat", empireStatePoint.getLatitude() + ,"long",empireStatePoint.getLongitude() + ) + ); + String mosconeCenterHash = (String)StellarProcessorUtils.run("GEOHASH_FROM_LATLONG(lat, long)" + , ImmutableMap.of("lat", mosconeCenterPoint.getLatitude() + ,"long",mosconeCenterPoint.getLongitude() + ) + ); + String jutlandHash = (String)StellarProcessorUtils.run("GEOHASH_FROM_LATLONG(lat, long)" + , ImmutableMap.of("lat", jutlandPoint.getLatitude() + ,"long",jutlandPoint.getLongitude() + ) + ); + + @Test + public void testToLatLong_happypath() throws Exception { + Map<String, Object> latLong = (Map<String, Object>)StellarProcessorUtils.run("GEOHASH_TO_LATLONG(hash)" + , ImmutableMap.of("hash", explicitJutlandHash ) ); + Assert.assertEquals(jutlandPoint.getLatitude(), (double)latLong.get("latitude"), 1e-3); + Assert.assertEquals(jutlandPoint.getLongitude(), (double)latLong.get("longitude"), 1e-3); + } + + @Test + public void testToLatLong_degenerate() throws Exception { + { + Map<String, Object> latLong = (Map<String, Object>) StellarProcessorUtils.run("GEOHASH_TO_LATLONG(hash)" + , ImmutableMap.of("hash", "u")); + Assert.assertFalse(Double.isNaN((double) latLong.get("latitude"))); + Assert.assertFalse(Double.isNaN((double) latLong.get("longitude"))); + } + { + Map<String, Object> latLong = (Map<String, Object>) StellarProcessorUtils.run("GEOHASH_TO_LATLONG(hash)" + , ImmutableMap.of("hash", "")); + Assert.assertEquals(0d, (double)latLong.get("latitude"), 1e-3); + Assert.assertEquals(0d, (double)latLong.get("longitude"), 1e-3); + } + { + Map<String, Object> latLong = (Map<String, Object>) StellarProcessorUtils.run("GEOHASH_TO_LATLONG(null)" + , new HashMap<>()); + Assert.assertNull(latLong); + } + } + + @Test + public void testHash_fromlatlong() throws Exception { + Assert.assertEquals("u4pruydqmv", StellarProcessorUtils.run("GEOHASH_FROM_LATLONG(lat, long, 10)" + , ImmutableMap.of("lat", jutlandPoint.getLatitude() + ,"long",jutlandPoint.getLongitude() + ) + ) + ); + + Assert.assertEquals("u4pruydqmvpb", StellarProcessorUtils.run("GEOHASH_FROM_LATLONG(lat, long)" + , ImmutableMap.of("lat", jutlandPoint.getLatitude() + ,"long",jutlandPoint.getLongitude() + ) + ) + ); + Assert.assertEquals("u4pruydqmv".substring(0, 6), StellarProcessorUtils.run("GEOHASH_FROM_LATLONG(lat, long, 6)" + , ImmutableMap.of("lat", jutlandPoint.getLatitude() + ,"long",jutlandPoint.getLongitude() + ) + ) + ); + Assert.assertNull(StellarProcessorUtils.run("GEOHASH_FROM_LATLONG(lat)" + , ImmutableMap.of("lat", jutlandPoint.getLatitude() + ) + ) + ); + Assert.assertNull(StellarProcessorUtils.run("GEOHASH_FROM_LATLONG(lat, long, 10)" + , ImmutableMap.of("lat", "blah" + ,"long",jutlandPoint.getLongitude() + ) + ) + ); + } + + @Test + public void testHash_fromLocation() throws Exception { + Map<String, String> loc = ImmutableMap.of( "latitude", "" + jutlandPoint.getLatitude() + , "longitude","" + jutlandPoint.getLongitude() + ); + Assert.assertEquals("u4pruydqmv", StellarProcessorUtils.run("GEOHASH_FROM_LOC(loc, 10)" + , ImmutableMap.of("loc", loc + ) + ) + ); + + Assert.assertEquals("u4pruydqmv".substring(0, 6), StellarProcessorUtils.run("GEOHASH_FROM_LOC(loc, 6)" + , ImmutableMap.of("loc", loc + ) + ) + ); + + Assert.assertEquals("u4pruydqmvpb", StellarProcessorUtils.run("GEOHASH_FROM_LOC(loc)" + , ImmutableMap.of("loc", loc + ) + ) + ); + Assert.assertNull(StellarProcessorUtils.run("GEOHASH_FROM_LOC(loc)" + , ImmutableMap.of("loc", ImmutableMap.of( "latitude", "57.64911" )) + ) + ); + Assert.assertNull(StellarProcessorUtils.run("GEOHASH_FROM_LOC(loc, 10)" + , ImmutableMap.of("loc", ImmutableMap.of( "latitude", "blah" + , "longitude","10.40740" + ) + ) + + ) + ); + } + + @Test + public void testDistanceHaversine() throws Exception { + testDistance(Optional.empty()); + testDistance(Optional.of("HAVERSINE")); + } + + @Test + public void testDistanceLawOfCosines() throws Exception { + testDistance(Optional.of("LAW_OF_COSINES")); + } + + @Test + public void testDistanceLawOfVicenty() throws Exception { + testDistance(Optional.of("VICENTY")); + } + + @Test + public void testMaxDistance_happyPath() throws Exception { + Double maxDistance = (double) StellarProcessorUtils.run("GEOHASH_MAX_DIST([empireState, mosconeCenter, jutland])" + , ImmutableMap.of("empireState", empireStateHash + , "mosconeCenter", mosconeCenterHash + , "jutland", jutlandHash + ) + ); + double expectedDistance = 8528; + Assert.assertEquals(expectedDistance, maxDistance, 1d); + } + + @Test + public void testMaxDistance_differentOrder() throws Exception { + Double maxDistance = (double) StellarProcessorUtils.run("GEOHASH_MAX_DIST([jutland, mosconeCenter, empireState])" + , ImmutableMap.of("empireState", empireStateHash + , "mosconeCenter", mosconeCenterHash + , "jutland", jutlandHash + ) + ); + double expectedDistance = 8528; + Assert.assertEquals(expectedDistance, maxDistance, 1d); + } + + @Test + public void testMaxDistance_withNulls() throws Exception { + Double maxDistance = (double) StellarProcessorUtils.run("GEOHASH_MAX_DIST([jutland, mosconeCenter, empireState, null])" + , ImmutableMap.of("empireState", empireStateHash + , "mosconeCenter", mosconeCenterHash + , "jutland", jutlandHash + ) + ); + double expectedDistance = 8528; + Assert.assertEquals(expectedDistance, maxDistance, 1d); + } + @Test + public void testMaxDistance_allSame() throws Exception { + Double maxDistance = (double) StellarProcessorUtils.run("GEOHASH_MAX_DIST([jutland, jutland, jutland])" + , ImmutableMap.of( "jutland", jutlandHash ) + ); + Assert.assertEquals(0, maxDistance, 1e-6d); + } + + @Test + public void testMaxDistance_emptyList() throws Exception { + Double maxDistance = (double) StellarProcessorUtils.run("GEOHASH_MAX_DIST([])" , new HashMap<>() ); + Assert.assertTrue(Double.isNaN(maxDistance)); + } + + @Test + public void testMaxDistance_nullList() throws Exception { + Double maxDistance = (Double) StellarProcessorUtils.run("GEOHASH_MAX_DIST(null)" , new HashMap<>() ); + Assert.assertNull(maxDistance); + } + + @Test + public void testMaxDistance_invalidList() throws Exception { + Double maxDistance = (Double) StellarProcessorUtils.run("GEOHASH_MAX_DIST()" , new HashMap<>() ); + Assert.assertNull(maxDistance); + } + + public void testDistance(Optional<String> method) throws Exception { + double expectedDistance = 4128; //in kilometers + Map<String, Object> vars = ImmutableMap.of("empireState", empireStateHash, "mosconeCenter", mosconeCenterHash); + //ensure that d(x, y) == d(y, x) and that both are the same as the expected (up to 1 km accuracy) + { + String stellarStatement = getDistStellarStatement(ImmutableList.of("mosconeCenter", "empireState"), method); + Assert.assertEquals(expectedDistance, (double) StellarProcessorUtils.run(stellarStatement , vars ), 1D ); + } + { + String stellarStatement = getDistStellarStatement(ImmutableList.of("empireState", "mosconeCenter"), method); + Assert.assertEquals(expectedDistance, (double) StellarProcessorUtils.run(stellarStatement , vars ), 1D ); + } + } + + private static String getDistStellarStatement(List<String> hashVariables, Optional<String> method) { + if(method.isPresent()) { + List<String> vars = new ArrayList<>(); + vars.addAll(hashVariables); + vars.add("\'" + method.get() + "\'"); + return "GEOHASH_DIST(" + Joiner.on(",").skipNulls().join(vars) + ")"; + } + else { + return "GEOHASH_DIST(" + Joiner.on(",").skipNulls().join(hashVariables) + ")"; + } + } + + @Test + public void testCentroid_List() throws Exception { + //happy path + { + double expectedLong = -98.740087 //calculated via http://www.geomidpoint.com/ using the center of gravity or geographic midpoint. + , expectedLat = 41.86921 + ; + Map<String, Double> centroid = (Map) StellarProcessorUtils.run("GEOHASH_TO_LATLONG(GEOHASH_CENTROID([empireState, mosconeCenter]))" + , ImmutableMap.of("empireState", empireStateHash, "mosconeCenter", mosconeCenterHash) + ); + Assert.assertEquals(expectedLong, centroid.get("longitude"), 1e-3); + Assert.assertEquals(expectedLat, centroid.get("latitude"), 1e-3); + } + //same point + { + double expectedLong = empireStatePoint.getLongitude() + , expectedLat = empireStatePoint.getLatitude() + ; + Map<String, Double> centroid = (Map) StellarProcessorUtils.run("GEOHASH_TO_LATLONG(GEOHASH_CENTROID([empireState, empireState]))" + , ImmutableMap.of("empireState", empireStateHash) + ); + Assert.assertEquals(expectedLong, centroid.get("longitude"), 1e-3); + Assert.assertEquals(expectedLat, centroid.get("latitude"), 1e-3); + } + //one point + { + double expectedLong = empireStatePoint.getLongitude() + , expectedLat = empireStatePoint.getLatitude() + ; + Map<String, Double> centroid = (Map) StellarProcessorUtils.run("GEOHASH_TO_LATLONG(GEOHASH_CENTROID([empireState]))" + , ImmutableMap.of("empireState", empireStateHash) + ); + Assert.assertEquals(expectedLong, centroid.get("longitude"), 1e-3); + Assert.assertEquals(expectedLat, centroid.get("latitude"), 1e-3); + } + //no points + { + Map<String, Double> centroid = (Map) StellarProcessorUtils.run("GEOHASH_TO_LATLONG(GEOHASH_CENTROID([]))" + , new HashMap<>() + ); + Assert.assertNull(centroid); + } + } + + @Test + public void testCentroid_weighted() throws Exception { + //happy path + { + double expectedLong = -98.740087 //calculated via http://www.geomidpoint.com/ using the center of gravity or geographic midpoint. + , expectedLat = 41.86921 + ; + for(int weight = 1;weight < 10;++weight) { + Map<Object, Integer> weightedPoints = ImmutableMap.of(empireStateHash, weight, mosconeCenterHash, weight); + Map<String, Double> centroid = (Map) StellarProcessorUtils.run("GEOHASH_TO_LATLONG(GEOHASH_CENTROID(weightedPoints))" + , ImmutableMap.of("weightedPoints", weightedPoints) + ); + Assert.assertEquals(expectedLong, centroid.get("longitude"), 1e-3); + Assert.assertEquals(expectedLat, centroid.get("latitude"), 1e-3); + } + } + //same point + { + double expectedLong = empireStatePoint.getLongitude() + , expectedLat = empireStatePoint.getLatitude() + ; + for(int weight = 1;weight < 10;++weight) { + Map<Object, Integer> weightedPoints = ImmutableMap.of(empireStateHash, weight); + Map<String, Double> centroid = (Map) StellarProcessorUtils.run("GEOHASH_TO_LATLONG(GEOHASH_CENTROID(weightedPoints))" + , ImmutableMap.of("weightedPoints", weightedPoints) + ); + Assert.assertEquals(expectedLong, centroid.get("longitude"), 1e-3); + Assert.assertEquals(expectedLat, centroid.get("latitude"), 1e-3); + } + } + //no points + { + Map<Object, Integer> weightedPoints = new HashMap<>(); + Map<String, Double> centroid = (Map) StellarProcessorUtils.run("GEOHASH_TO_LATLONG(GEOHASH_CENTROID(weightedPoints))" + , ImmutableMap.of("weightedPoints", weightedPoints) + ); + Assert.assertNull(centroid); + } + } +} http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-hbase-client/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/metron-hbase-client/pom.xml b/metron-platform/metron-hbase-client/pom.xml index 5dd6127..1237be7 100644 --- a/metron-platform/metron-hbase-client/pom.xml +++ b/metron-platform/metron-hbase-client/pom.xml @@ -80,6 +80,16 @@ <goal>shade</goal> </goals> <configuration> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>org.apache.commons.logging</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-indexing/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/metron-indexing/pom.xml b/metron-platform/metron-indexing/pom.xml index c64c374..7d07665 100644 --- a/metron-platform/metron-indexing/pom.xml +++ b/metron-platform/metron-indexing/pom.xml @@ -222,6 +222,16 @@ <configuration> <shadedArtifactAttached>true</shadedArtifactAttached> <shadedClassifierName>uber</shadedClassifierName> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.google.common</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-management/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/metron-management/pom.xml b/metron-platform/metron-management/pom.xml index 4117d69..a5cae38 100644 --- a/metron-platform/metron-management/pom.xml +++ b/metron-platform/metron-management/pom.xml @@ -205,6 +205,16 @@ <goal>shade</goal> </goals> <configuration> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.google.common</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-parsers/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/metron-parsers/pom.xml b/metron-platform/metron-parsers/pom.xml index b7c21ff..85c6218 100644 --- a/metron-platform/metron-parsers/pom.xml +++ b/metron-platform/metron-parsers/pom.xml @@ -266,6 +266,16 @@ <configuration> <shadedArtifactAttached>true</shadedArtifactAttached> <shadedClassifierName>uber</shadedClassifierName> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.fasterxml.jackson</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-pcap-backend/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/metron-pcap-backend/pom.xml b/metron-platform/metron-pcap-backend/pom.xml index 388b1e0..5878873 100644 --- a/metron-platform/metron-pcap-backend/pom.xml +++ b/metron-platform/metron-pcap-backend/pom.xml @@ -221,6 +221,16 @@ <goal>shade</goal> </goals> <configuration> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.google.common</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-solr/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/metron-solr/pom.xml b/metron-platform/metron-solr/pom.xml index be1fe33..97132c4 100644 --- a/metron-platform/metron-solr/pom.xml +++ b/metron-platform/metron-solr/pom.xml @@ -232,6 +232,16 @@ <configuration> <shadedArtifactAttached>true</shadedArtifactAttached> <shadedClassifierName>uber</shadedClassifierName> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <artifactSet> <excludes> <exclude>storm:storm-core:*</exclude> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-platform/metron-writer/pom.xml ---------------------------------------------------------------------- diff --git a/metron-platform/metron-writer/pom.xml b/metron-platform/metron-writer/pom.xml index 7d3152c..de6b3b8 100644 --- a/metron-platform/metron-writer/pom.xml +++ b/metron-platform/metron-writer/pom.xml @@ -238,6 +238,16 @@ <goal>shade</goal> </goals> <configuration> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.google.common</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-stellar/stellar-common/README.md ---------------------------------------------------------------------- diff --git a/metron-stellar/stellar-common/README.md b/metron-stellar/stellar-common/README.md index 4dc7d8d..340a7ae 100644 --- a/metron-stellar/stellar-common/README.md +++ b/metron-stellar/stellar-common/README.md @@ -135,6 +135,12 @@ In the core language functions, we support basic functional programming primitiv | [ `FUZZY_SCORE`](#fuzzy_score) | | [ `FORMAT`](#format) | | [ `GEO_GET`](#geo_get) | +| [ `GEOHASH_CENTROID`](#geohash_centroid) | +| [ `GEOHASH_DIST`](#geohash_dist) | +| [ `GEOHASH_FROM_LATLONG`](#geohash_from_latlong) | +| [ `GEOHASH_FROM_LOC`](#geohash_from_loc) | +| [ `GEOHASH_MAX_DIST`](#geohash_max_dist) | +| [ `GEOHASH_TO_LATLONG`](#geohash_to_latlong) | | [ `GET`](#get) | | [ `GET_FIRST`](#get_first) | | [ `GET_LAST`](#get_last) | @@ -443,6 +449,50 @@ In the core language functions, we support basic functional programming primitiv * fields - Optional list of GeoIP fields to grab. Options are locID, country, city postalCode, dmaCode, latitude, longitude, location_point * Returns: If a Single field is requested a string of the field, If multiple fields a map of string of the fields, and null otherwise +### `GEOHASH_CENTROID` + * Description: Compute the centroid (geographic midpoint or center of gravity) of a set of [geohashes](https://en.wikipedia.org/wiki/Geohash) + * Input: + * hashes - A collection of [geohashes](https://en.wikipedia.org/wiki/Geohash) or a map associating geohashes to numeric weights + * character_precision? - The number of characters to use in the hash. Default is 12 + * Returns: The geohash of the centroid + +### `GEOHASH_DIST` + * Description: Compute the distance between [geohashes](https://en.wikipedia.org/wiki/Geohash) + * Input: + * hash1 - The first point as a geohash + * hash2 - The second point as a geohash + * strategy? - The great circle distance strategy to use. One of [HAVERSINE](https://en.wikipedia.org/wiki/Haversine_formula), [LAW_OF_COSINES](https://en.wikipedia.org/wiki/Law_of_cosines#Using_the_distance_formula), or [VICENTY](https://en.wikipedia.org/wiki/Vincenty%27s_formulae). Haversine is default. + * Returns: The distance in kilometers between the hashes. + +### `GEOHASH_FROM_LATLONG` + * Description: Compute [geohash](https://en.wikipedia.org/wiki/Geohash) given a lat/long + * Input: + * latitude - The latitude + * longitude - The longitude + * character_precision? - The number of characters to use in the hash. Default is 12 + * Returns: A [geohash](https://en.wikipedia.org/wiki/Geohash) of the lat/long + +### `GEOHASH_FROM_LOC` + * Description: Compute [geohash](https://en.wikipedia.org/wiki/Geohash) given a geo enrichment location + * Input: + * map - the latitude and logitude in a map (the output of [GEO_GET](#geo_get) ) + * longitude - The longitude + * character_precision? - The number of characters to use in the hash. Default is `12` + * Returns: A [geohash](https://en.wikipedia.org/wiki/Geohash) of the location + +### `GEOHASH_MAX_DIST` + * Description: Compute the maximum distance among a list of [geohashes](https://en.wikipedia.org/wiki/Geohash) + * Input: + * hashes - A set of [geohashes](https://en.wikipedia.org/wiki/Geohash) + * strategy? - The great circle distance strategy to use. One of [HAVERSINE](https://en.wikipedia.org/wiki/Haversine_formula), [LAW_OF_COSINES](https://en.wikipedia.org/wiki/Law_of_cosines#Using_the_distance_formula), or [VICENTY](https://en.wikipedia.org/wiki/Vincenty%27s_formulae). Haversine is default. + * Returns: The maximum distance in kilometers between any two locations + +### `GEOHASH_TO_LATLONG` + * Description: Compute the lat/long of a given [geohash](https://en.wikipedia.org/wiki/Geohash) + * Input: + * hash - The [geohash](https://en.wikipedia.org/wiki/Geohash) + * Returns: A map containing the latitude and longitude of the hash (keys "latitude" and "longitude") + ### `GET` * Description: Returns the i'th element of the list * Input: http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/metron-stellar/stellar-common/pom.xml ---------------------------------------------------------------------- diff --git a/metron-stellar/stellar-common/pom.xml b/metron-stellar/stellar-common/pom.xml index 5945bbd..9ec29b8 100644 --- a/metron-stellar/stellar-common/pom.xml +++ b/metron-stellar/stellar-common/pom.xml @@ -257,6 +257,16 @@ <configuration> <shadedArtifactAttached>true</shadedArtifactAttached> <shadedClassifierName>uber</shadedClassifierName> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> <relocations> <relocation> <pattern>com.fasterxml.jackson</pattern> http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/use-cases/README.md ---------------------------------------------------------------------- diff --git a/use-cases/README.md b/use-cases/README.md new file mode 100644 index 0000000..02be32d --- /dev/null +++ b/use-cases/README.md @@ -0,0 +1,4 @@ +# Worked Examples + +The following are worked examples of use-cases that showcase some (or +many) component(s) of Metron. http://git-wip-us.apache.org/repos/asf/metron/blob/dd711819/use-cases/geographic_login_outliers/README.md ---------------------------------------------------------------------- diff --git a/use-cases/geographic_login_outliers/README.md b/use-cases/geographic_login_outliers/README.md new file mode 100644 index 0000000..99e9a5b --- /dev/null +++ b/use-cases/geographic_login_outliers/README.md @@ -0,0 +1,267 @@ +# Problem Statement + +One way to find anomalous behavior in a network is by inspecting user +login behavior. In particular, if a user is logging in via vastly +differing geographic locations in a short period of time, this may be +evidence of malicious behavior. + +More formally, we can encode this potentially malicious event in terms +of how far from the geographic centroid of the user's historic logins +as compared to all users. For instance, if we track all users and the +median distance from the central geographic location of all of their +logins for the last 2 hours is 3 km and the standard deviation is 1 km, +if we see a user logging in 1700 km from the central geographic location of +their logins for the last 2 hours, then they MAY be exhibiting a +deviation that we want to monitor since it would be hard to travel that +distance in 4 hours. On the other hand, the user may have +just used a VPN or proxy. Ultimately, this sort of analytic must be +considered only one piece of evidence in addition to many others before +we want to indicate an alert. + +# Demonstration Design +For the purposes of demonstration, we will construct synthetic data +whereby 2 users are logging into a system rather quickly (once per +second) from various hosts. Each user's locations share the same first +2 octets, but will choose the last 2 randomly. We will then inject a +data point indicating `user1` is logging in via a russian IP address. + +## Preliminaries +We assume that the following environment variables are set: +* `METRON_HOME` - the home directory for metron +* `ZOOKEEPER` - The zookeeper quorum (comma separated with port specified: e.g. `node1:2181` for full-dev) +* `BROKERLIST` - The Kafka broker list (comma separated with port specified: e.g. `node1:6667` for full-dev) +* `ES_HOST` - The elasticsearch master (and port) e.g. `node1:9200` for full-dev. + +Also, this does not assume that you are using a kerberized cluster. If you are, then the parser start command will adjust slightly to include the security protocol. + +Before editing configurations, be sure to pull the configs from zookeeper locally via +``` +$METRON_HOME/bin/zk_load_configs.sh --mode PULL -z $ZOOKEEPER -o $METRON_HOME/config/zookeeper/ -f +``` + +## Configure the Profiler +First, we'll configure the profiler to emit a profiler every 1 minute: +* In Ambari, set the profiler period duration to `1` minute via the Profiler config section. +* Adjust `$METRON_HOME/config/zookeeper/global.json` to adjust the capture duration: +``` + "profiler.client.period.duration" : "1", + "profiler.client.period.duration.units" : "MINUTES" +``` + +## Create the Data Generator +We want to create a new sensor for our synthetic data called `auth`. To +feed it, we need a synthetic data generator. In particular, we want a +process which will feed authentication events per second for a set of +users where the IPs are randomly chosen, but each user's login ip +addresses share the same first 2 octets. + +Edit `~/gen_data.py` and paste the following into it: +``` +#!/usr/bin/python + +import random +import sys +import time + +domains = { 'user1' : '173.90', 'user2' : '156.33' } + +def get_ip(base): + return base + '.' + str(random.randint(1,255)) + '.' + str(random.randint(1, 255)) + +def main(): + freq_s = 1 + while True: + user='user' + str(random.randint(1,len(domains))) + epoch_time = int(time.time()) + ip=get_ip(domains[user]) + print user + ',' + ip + ',' + str(epoch_time) + sys.stdout.flush() + time.sleep(freq_s) + +if __name__ == '__main__': + main() +``` + +## Create the `auth` Parser + +The message format for our simple synthetic data is a CSV with: +* username +* login ip address +* timestamp + +We will need to parse this via our `CSVParser` and add the geohash of the login ip address. + +* To create this parser, edit `$METRON_HOME/config/zookeeper/parsers/auth.json` and paste the following: +``` +{ + "parserClassName" : "org.apache.metron.parsers.csv.CSVParser" + ,"sensorTopic" : "auth" + ,"parserConfig" : { + "columns" : { + "user" : 0, + "ip" : 1, + "timestamp" : 2 + } + } + ,"fieldTransformations" : [ + { + "transformation" : "STELLAR" + ,"output" : [ "hash" ] + ,"config" : { + "hash" : "GEOHASH_FROM_LOC(GEO_GET(ip))" + } + } + ] +} +``` +* Create the kafka topic via: +``` +/usr/hdp/current/kafka-broker/bin/kafka-topics.sh --zookeeper $ZOOKEEPER --create --topic auth --partitions 1 --replication-factor 1 +``` + +## Create the Profiles for Enrichment + +We will need to track 2 profiles to accomplish this task: +* `locations_by_user` - The geohashes of the locations the user has logged in from. This is a multiset of geohashes per user. Note that the multiset in this case is effectively a map of geohashes to occurrance counts. +* `geo_distribution_from_centroid` - The statistical distribution of the distance between a login location and the geographic centroid of the user's previous logins from the last 2 minutes. Note, in a real installation this would be a larger temporal lookback. + +We can represent these in the `$METRON_HOME/config/zookeeper/profiler.json` via the following: +``` +{ + "profiles": [ + { + "profile": "geo_distribution_from_centroid", + "foreach": "'global'", + "onlyif": "exists(geo_distance) && geo_distance != null", + "init" : { + "s": "STATS_INIT()" + }, + "update": { + "s": "STATS_ADD(s, geo_distance)" + }, + "result": "s" + }, + { + "profile": "locations_by_user", + "foreach": "user", + "onlyif": "exists(hash) && hash != null && LENGTH(hash) > 0", + "init" : { + "s": "MULTISET_INIT()" + }, + "update": { + "s": "MULTISET_ADD(s, hash)" + }, + "result": "s" + } + ] +} +``` + +## Enrich authentication Events + +We will need to enrich the authentication records in a couple of ways to use in the threat triage section as well as the profiles: +* `geo_distance`: representing the distance between the current geohash and the geographic centroid for the last 2 minutes. +* `geo_centroid`: representing the geographic centroid for the last 2 minutes + +Beyond that, we will need to determine if the authentication event is a geographic outlier by computing the following fields: +* `dist_median` : representing the median distance between a user's login location and the geographic centroid for the last 2 minutes (essentially the median of the `geo_distance` values across all users). +* `dist_sd` : representing the standard deviation of the distance between a user's login location and the geographic centroid for the last 2 minutes (essentially the standard deviation of the `geo_distance` values across all users). +* `geo_outlier` : whether `geo_distance` is more than 5 standard deviations from the median across all users. + +We also want to set up a triage rule associating a score and setting an alert if `geo_outlier` is true. In reality, this would be more complex as this metric is at best circumstantial and would need supporting evidence, but for simplicity we'll deal with the false positives. + +* Edit `$METRON_HOME/config/zookeeper/enrichments/auth.json` and paste the following: +``` +{ + "enrichment": { + "fieldMap": { + "stellar" : { + "config" : [ + "geo_locations := MULTISET_MERGE( PROFILE_GET( 'locations_by_user', user, PROFILE_FIXED( 2, 'MINUTES')))", + "geo_centroid := GEOHASH_CENTROID(geo_locations)", + "geo_distance := TO_INTEGER(GEOHASH_DIST(geo_centroid, hash))", + "geo_locations := null" + ] + } + } + ,"fieldToTypeMap": { } + }, + "threatIntel": { + "fieldMap": { + "stellar" : { + "config" : [ + "geo_distance_distr:= STATS_MERGE( PROFILE_GET( 'geo_distribution_from_centroid', 'global', PROFILE_FIXED( 2, 'MINUTES')))", + "dist_median := STATS_PERCENTILE(geo_distance_distr, 50.0)", + "dist_sd := STATS_SD(geo_distance_distr)", + "geo_outlier := ABS(dist_median - geo_distance) >= 5*dist_sd", + "is_alert := exists(is_alert) && is_alert", + "is_alert := is_alert || (geo_outlier != null && geo_outlier == true)", + "geo_distance_distr := null" + ] + } + + }, + "fieldToTypeMap": { }, + "triageConfig" : { + "riskLevelRules" : [ + { + "name" : "Geographic Outlier", + "comment" : "Determine if the user's geographic distance from the centroid of the historic logins is an outlier as compared to all users.", + "rule" : "geo_outlier != null && geo_outlier", + "score" : 10, + "reason" : "FORMAT('user %s has a distance (%d) from the centroid of their last login is 5 std deviations (%f) from the median (%f)', user, geo_distance, dist_sd, dist_median)" + } + ], + "aggregator" : "MAX" + } + } +} +``` + +## Execute Demonstration + +From here, we've set up our configuration and can push the configs: +* Push the configs to zookeeper via +``` +$METRON_HOME/bin/zk_load_configs.sh --mode PUSH -z node1:2181 -i $METRON_HOME/config/zookeeper/ +``` +* Start the parser via: +``` +$METRON_HOME/bin/start_parser_topology.sh -k $BROKERLIST -z $ZOOKEEPER -s auth +``` +* Push synthetic data into the `auth` topic via +``` +python ~/gen_data.py | /usr/hdp/current/kafka-broker/bin/kafka-console-producer.sh --broker-list node1:6667 --topic auth +``` +* Wait for about `5` minutes and kill the previous command +* Push a synthetic record indicating `user1` has logged in from a russian IP (`109.252.227.173`): +``` +echo -e "import time\nprint 'user1,109.252.227.173,'+str(int(time.time()))" | python | /usr/hdp/current/kafka-broker/bin/kafka-console-producer.sh --broker-list $BROKERLIST --topic auth +``` +* Execute the following to search elasticsearch for our geographic login outliers: +``` +curl -XPOST "http://$ES_HOST/auth*/_search?pretty" -d ' +{ + "_source" : [ "is_alert", "threat:triage:rules:0:reason", "user", "ip", "geo_distance" ], + "query": { "exists" : { "field" : "threat:triage:rules:0:reason" } } +} +' +``` + +You should see, among a few other false positive results, something like the following: +``` +{ + "_index" : "auth_index_2017.09.07.20", + "_type" : "auth_doc", + "_id" : "f5bdbf76-9d78-48cc-b21d-bc434c96e62e", + "_score" : 1.0, + "_source" : { + "geo_distance" : 7879, + "threat:triage:rules:0:reason" : "user user1 has a distance (7879) from the centroid of their last login is 5 std deviations (334.814719) from the median (128.000000)", + "ip" : "109.252.227.173", + "is_alert" : "true", + "user" : "user1" + } +} +``` +
