[ 
https://issues.apache.org/jira/browse/METRON-283?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15838511#comment-15838511
 ] 

ASF GitHub Bot commented on METRON-283:
---------------------------------------

Github user nickwallen commented on a diff in the pull request:

    https://github.com/apache/incubator-metron/pull/421#discussion_r97868132
  
    --- Diff: 
metron-platform/metron-enrichment/src/main/java/org/apache/metron/enrichment/adapters/geo/GeoLiteDatabase.java
 ---
    @@ -0,0 +1,184 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.metron.enrichment.adapters.geo;
    +
    +import com.maxmind.db.CHMCache;
    +import com.maxmind.geoip2.DatabaseReader;
    +import com.maxmind.geoip2.exception.GeoIp2Exception;
    +import com.maxmind.geoip2.model.CityResponse;
    +import com.maxmind.geoip2.record.City;
    +import com.maxmind.geoip2.record.Country;
    +import com.maxmind.geoip2.record.Location;
    +import com.maxmind.geoip2.record.Postal;
    +import org.apache.commons.validator.routines.InetAddressValidator;
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import java.io.IOException;
    +import java.net.InetAddress;
    +import java.net.UnknownHostException;
    +import java.util.HashMap;
    +import java.util.Map;
    +import java.util.Optional;
    +import java.util.concurrent.locks.Lock;
    +import java.util.concurrent.locks.ReentrantReadWriteLock;
    +import java.util.zip.GZIPInputStream;
    +
    +public enum GeoLiteDatabase {
    +  INSTANCE;
    +
    +  protected static final Logger LOG = 
LoggerFactory.getLogger(GeoLiteDatabase.class);
    +  public static final String GEO_HDFS_FILE = "geo.hdfs.file";
    +  public static final String GEO_HDFS_FILE_DEFAULT = 
"/apps/metron/geo/default/GeoLite2-City.mmdb.gz";
    +
    +  private static ReentrantReadWriteLock lock = new 
ReentrantReadWriteLock();
    +  private static final Lock readLock = lock.readLock();
    +  private static final Lock writeLock = lock.writeLock();
    +  private static InetAddressValidator ipvalidator = new 
InetAddressValidator();
    +  private static volatile String hdfsLoc = GEO_HDFS_FILE_DEFAULT;
    +  private static DatabaseReader reader = null;
    +
    +  public synchronized void updateIfNecessary(Map<String, Object> 
globalConfig) {
    +    // Reload database if necessary (file changes on HDFS)
    +    LOG.trace("[Metron] Determining if GeoIpDatabase update required");
    +    String hdfsFile = GEO_HDFS_FILE_DEFAULT;
    +    if (globalConfig != null) {
    +      hdfsFile = (String) globalConfig.getOrDefault(GEO_HDFS_FILE, 
GEO_HDFS_FILE_DEFAULT);
    +    }
    +
    +    // Always update if we don't have a DatabaseReader
    +    if (reader == null || !hdfsLoc.equals(hdfsFile)) {
    +      // Update
    +      hdfsLoc = hdfsFile;
    +      update(hdfsFile);
    +    } else {
    +      LOG.trace("[Metron] Update to GeoIpDatabase unnecessary");
    +    }
    +  }
    +
    +  @SuppressWarnings("unchecked")
    +  public void update(String hdfsFile) {
    +    // If nothing is set (or it's been unset, use the defaults)
    +    if (hdfsFile == null || hdfsFile.isEmpty()) {
    +      LOG.debug("[Metron] Using default for {}: {}", GEO_HDFS_FILE, 
GEO_HDFS_FILE_DEFAULT);
    +      hdfsFile = GEO_HDFS_FILE_DEFAULT;
    +    }
    +
    +    FileSystem fs;
    +    try {
    +      fs = FileSystem.get(new Configuration());
    +    } catch (IOException e) {
    +      LOG.error("[Metron] Unable to retrieve get HDFS FileSystem");
    +      throw new IllegalStateException("[Metron] Unable to get HDFS 
FileSystem");
    +    }
    +
    +    try (GZIPInputStream gis = new GZIPInputStream(fs.open(new 
Path(hdfsFile)))) {
    +      writeLock.lock();
    +      LOG.info("[Metron] Update to GeoIP data started with {}", hdfsFile);
    +      // InputStream based DatabaseReaders are always in memory.
    +      DatabaseReader newReader = new 
DatabaseReader.Builder(gis).withCache(new CHMCache()).build();
    +      DatabaseReader oldReader = reader;
    +      reader = newReader;
    +      // If we've never set a reader, don't close the old one
    +      if (oldReader != null) {
    +        oldReader.close();
    +      }
    +      LOG.info("[Metron] Finished update to GeoIP data started with {}", 
hdfsFile);
    +    } catch (IOException e) {
    +      LOG.error("[Metron] Unable to open new database file {}", hdfsFile, 
e);
    +      throw new IllegalStateException("[Metron] Unable to update MaxMind 
database");
    +    } finally {
    +      // Don't unlock if the try failed
    +      if (lock.isWriteLocked()) {
    +        writeLock.unlock();
    +      }
    +    }
    +  }
    +
    +  // Optional.empty means that we don't have any geo location in database.
    +  // Optional exists, but empty means local IP (valid, but no info will be 
in the DB)
    +  @SuppressWarnings("unchecked")
    +  public Optional<HashMap<String, String>> get(String ip) {
    +    // Call get every single time, returns current version. Updates behind 
the scenes.
    +    LOG.trace("[Metron] Called GeoIpDatabase.get({})", ip);
    +    InetAddress addr = null;
    +    try {
    +      addr = InetAddress.getByName(ip);
    +    } catch (UnknownHostException e) {
    +      LOG.warn("[Metron] No result found for IP {}", ip, e);
    +      return Optional.empty();
    +    }
    +    if (isLocalAddress(ip, addr)) {
    +      return Optional.of(new HashMap());
    +    }
    +
    +    try {
    +      readLock.lock();
    +      addr = InetAddress.getByName(ip);
    +      CityResponse cityResponse = reader.city(addr);
    --- End diff --
    
    Would it make sense to constrain the scope of the read lock?  Once we have 
a `CityResponse` object we don't need the lock any longer.


> Migrate Geo Enrichment outside of MySQL
> ---------------------------------------
>
>                 Key: METRON-283
>                 URL: https://issues.apache.org/jira/browse/METRON-283
>             Project: Metron
>          Issue Type: Improvement
>            Reporter: James Sirota
>            Assignee: Justin Leet
>            Priority: Minor
>
> We need to migrate our enrichment SQL store from MySQL to Phoenix or some 
> other SQL on Hbase library.  Or alternatively come up with a way to do this 
> without using SQL.  This way we don't have a dependency on MySQL and there is 
> one less thing that we need to install on our platform 



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to