[ https://issues.apache.org/jira/browse/METRON-283?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15838511#comment-15838511 ]
ASF GitHub Bot commented on METRON-283: --------------------------------------- Github user nickwallen commented on a diff in the pull request: https://github.com/apache/incubator-metron/pull/421#discussion_r97868132 --- Diff: metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/GeoLiteDatabase.java --- @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.metron.enrichment.adapters.geo; + +import com.maxmind.db.CHMCache; +import com.maxmind.geoip2.DatabaseReader; +import com.maxmind.geoip2.exception.GeoIp2Exception; +import com.maxmind.geoip2.model.CityResponse; +import com.maxmind.geoip2.record.City; +import com.maxmind.geoip2.record.Country; +import com.maxmind.geoip2.record.Location; +import com.maxmind.geoip2.record.Postal; +import org.apache.commons.validator.routines.InetAddressValidator; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.zip.GZIPInputStream; + +public enum GeoLiteDatabase { + INSTANCE; + + protected static final Logger LOG = LoggerFactory.getLogger(GeoLiteDatabase.class); + public static final String GEO_HDFS_FILE = "geo.hdfs.file"; + public static final String GEO_HDFS_FILE_DEFAULT = "/apps/metron/geo/default/GeoLite2-City.mmdb.gz"; + + private static ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + private static final Lock readLock = lock.readLock(); + private static final Lock writeLock = lock.writeLock(); + private static InetAddressValidator ipvalidator = new InetAddressValidator(); + private static volatile String hdfsLoc = GEO_HDFS_FILE_DEFAULT; + private static DatabaseReader reader = null; + + public synchronized void updateIfNecessary(Map<String, Object> globalConfig) { + // Reload database if necessary (file changes on HDFS) + LOG.trace("[Metron] Determining if GeoIpDatabase update required"); + String hdfsFile = GEO_HDFS_FILE_DEFAULT; + if (globalConfig != null) { + hdfsFile = (String) globalConfig.getOrDefault(GEO_HDFS_FILE, GEO_HDFS_FILE_DEFAULT); + } + + // Always update if we don't have a DatabaseReader + if (reader == null || !hdfsLoc.equals(hdfsFile)) { + // Update + hdfsLoc = hdfsFile; + update(hdfsFile); + } else { + LOG.trace("[Metron] Update to GeoIpDatabase unnecessary"); + } + } + + @SuppressWarnings("unchecked") + public void update(String hdfsFile) { + // If nothing is set (or it's been unset, use the defaults) + if (hdfsFile == null || hdfsFile.isEmpty()) { + LOG.debug("[Metron] Using default for {}: {}", GEO_HDFS_FILE, GEO_HDFS_FILE_DEFAULT); + hdfsFile = GEO_HDFS_FILE_DEFAULT; + } + + FileSystem fs; + try { + fs = FileSystem.get(new Configuration()); + } catch (IOException e) { + LOG.error("[Metron] Unable to retrieve get HDFS FileSystem"); + throw new IllegalStateException("[Metron] Unable to get HDFS FileSystem"); + } + + try (GZIPInputStream gis = new GZIPInputStream(fs.open(new Path(hdfsFile)))) { + writeLock.lock(); + LOG.info("[Metron] Update to GeoIP data started with {}", hdfsFile); + // InputStream based DatabaseReaders are always in memory. + DatabaseReader newReader = new DatabaseReader.Builder(gis).withCache(new CHMCache()).build(); + DatabaseReader oldReader = reader; + reader = newReader; + // If we've never set a reader, don't close the old one + if (oldReader != null) { + oldReader.close(); + } + LOG.info("[Metron] Finished update to GeoIP data started with {}", hdfsFile); + } catch (IOException e) { + LOG.error("[Metron] Unable to open new database file {}", hdfsFile, e); + throw new IllegalStateException("[Metron] Unable to update MaxMind database"); + } finally { + // Don't unlock if the try failed + if (lock.isWriteLocked()) { + writeLock.unlock(); + } + } + } + + // Optional.empty means that we don't have any geo location in database. + // Optional exists, but empty means local IP (valid, but no info will be in the DB) + @SuppressWarnings("unchecked") + public Optional<HashMap<String, String>> get(String ip) { + // Call get every single time, returns current version. Updates behind the scenes. + LOG.trace("[Metron] Called GeoIpDatabase.get({})", ip); + InetAddress addr = null; + try { + addr = InetAddress.getByName(ip); + } catch (UnknownHostException e) { + LOG.warn("[Metron] No result found for IP {}", ip, e); + return Optional.empty(); + } + if (isLocalAddress(ip, addr)) { + return Optional.of(new HashMap()); + } + + try { + readLock.lock(); + addr = InetAddress.getByName(ip); + CityResponse cityResponse = reader.city(addr); --- End diff -- Would it make sense to constrain the scope of the read lock? Once we have a `CityResponse` object we don't need the lock any longer. > Migrate Geo Enrichment outside of MySQL > --------------------------------------- > > Key: METRON-283 > URL: https://issues.apache.org/jira/browse/METRON-283 > Project: Metron > Issue Type: Improvement > Reporter: James Sirota > Assignee: Justin Leet > Priority: Minor > > We need to migrate our enrichment SQL store from MySQL to Phoenix or some > other SQL on Hbase library. Or alternatively come up with a way to do this > without using SQL. This way we don't have a dependency on MySQL and there is > one less thing that we need to install on our platform -- This message was sent by Atlassian JIRA (v6.3.4#6332)