[
https://issues.apache.org/jira/browse/METRON-283?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15838511#comment-15838511
]
ASF GitHub Bot commented on METRON-283:
---------------------------------------
Github user nickwallen commented on a diff in the pull request:
https://github.com/apache/incubator-metron/pull/421#discussion_r97868132
--- Diff:
metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/GeoLiteDatabase.java
---
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.metron.enrichment.adapters.geo;
+
+import com.maxmind.db.CHMCache;
+import com.maxmind.geoip2.DatabaseReader;
+import com.maxmind.geoip2.exception.GeoIp2Exception;
+import com.maxmind.geoip2.model.CityResponse;
+import com.maxmind.geoip2.record.City;
+import com.maxmind.geoip2.record.Country;
+import com.maxmind.geoip2.record.Location;
+import com.maxmind.geoip2.record.Postal;
+import org.apache.commons.validator.routines.InetAddressValidator;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.zip.GZIPInputStream;
+
+public enum GeoLiteDatabase {
+ INSTANCE;
+
+ protected static final Logger LOG =
LoggerFactory.getLogger(GeoLiteDatabase.class);
+ public static final String GEO_HDFS_FILE = "geo.hdfs.file";
+ public static final String GEO_HDFS_FILE_DEFAULT =
"/apps/metron/geo/default/GeoLite2-City.mmdb.gz";
+
+ private static ReentrantReadWriteLock lock = new
ReentrantReadWriteLock();
+ private static final Lock readLock = lock.readLock();
+ private static final Lock writeLock = lock.writeLock();
+ private static InetAddressValidator ipvalidator = new
InetAddressValidator();
+ private static volatile String hdfsLoc = GEO_HDFS_FILE_DEFAULT;
+ private static DatabaseReader reader = null;
+
+ public synchronized void updateIfNecessary(Map<String, Object>
globalConfig) {
+ // Reload database if necessary (file changes on HDFS)
+ LOG.trace("[Metron] Determining if GeoIpDatabase update required");
+ String hdfsFile = GEO_HDFS_FILE_DEFAULT;
+ if (globalConfig != null) {
+ hdfsFile = (String) globalConfig.getOrDefault(GEO_HDFS_FILE,
GEO_HDFS_FILE_DEFAULT);
+ }
+
+ // Always update if we don't have a DatabaseReader
+ if (reader == null || !hdfsLoc.equals(hdfsFile)) {
+ // Update
+ hdfsLoc = hdfsFile;
+ update(hdfsFile);
+ } else {
+ LOG.trace("[Metron] Update to GeoIpDatabase unnecessary");
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ public void update(String hdfsFile) {
+ // If nothing is set (or it's been unset, use the defaults)
+ if (hdfsFile == null || hdfsFile.isEmpty()) {
+ LOG.debug("[Metron] Using default for {}: {}", GEO_HDFS_FILE,
GEO_HDFS_FILE_DEFAULT);
+ hdfsFile = GEO_HDFS_FILE_DEFAULT;
+ }
+
+ FileSystem fs;
+ try {
+ fs = FileSystem.get(new Configuration());
+ } catch (IOException e) {
+ LOG.error("[Metron] Unable to retrieve get HDFS FileSystem");
+ throw new IllegalStateException("[Metron] Unable to get HDFS
FileSystem");
+ }
+
+ try (GZIPInputStream gis = new GZIPInputStream(fs.open(new
Path(hdfsFile)))) {
+ writeLock.lock();
+ LOG.info("[Metron] Update to GeoIP data started with {}", hdfsFile);
+ // InputStream based DatabaseReaders are always in memory.
+ DatabaseReader newReader = new
DatabaseReader.Builder(gis).withCache(new CHMCache()).build();
+ DatabaseReader oldReader = reader;
+ reader = newReader;
+ // If we've never set a reader, don't close the old one
+ if (oldReader != null) {
+ oldReader.close();
+ }
+ LOG.info("[Metron] Finished update to GeoIP data started with {}",
hdfsFile);
+ } catch (IOException e) {
+ LOG.error("[Metron] Unable to open new database file {}", hdfsFile,
e);
+ throw new IllegalStateException("[Metron] Unable to update MaxMind
database");
+ } finally {
+ // Don't unlock if the try failed
+ if (lock.isWriteLocked()) {
+ writeLock.unlock();
+ }
+ }
+ }
+
+ // Optional.empty means that we don't have any geo location in database.
+ // Optional exists, but empty means local IP (valid, but no info will be
in the DB)
+ @SuppressWarnings("unchecked")
+ public Optional<HashMap<String, String>> get(String ip) {
+ // Call get every single time, returns current version. Updates behind
the scenes.
+ LOG.trace("[Metron] Called GeoIpDatabase.get({})", ip);
+ InetAddress addr = null;
+ try {
+ addr = InetAddress.getByName(ip);
+ } catch (UnknownHostException e) {
+ LOG.warn("[Metron] No result found for IP {}", ip, e);
+ return Optional.empty();
+ }
+ if (isLocalAddress(ip, addr)) {
+ return Optional.of(new HashMap());
+ }
+
+ try {
+ readLock.lock();
+ addr = InetAddress.getByName(ip);
+ CityResponse cityResponse = reader.city(addr);
--- End diff --
Would it make sense to constrain the scope of the read lock? Once we have
a `CityResponse` object we don't need the lock any longer.
> Migrate Geo Enrichment outside of MySQL
> ---------------------------------------
>
> Key: METRON-283
> URL: https://issues.apache.org/jira/browse/METRON-283
> Project: Metron
> Issue Type: Improvement
> Reporter: James Sirota
> Assignee: Justin Leet
> Priority: Minor
>
> We need to migrate our enrichment SQL store from MySQL to Phoenix or some
> other SQL on Hbase library. Or alternatively come up with a way to do this
> without using SQL. This way we don't have a dependency on MySQL and there is
> one less thing that we need to install on our platform
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)