Hello Ottomata,

I'd like you to do a code review.  Please visit

    https://gerrit.wikimedia.org/r/188012

to review the following change.

Change subject: Fix overriding MaxMind database location in Geocoding
......................................................................

Fix overriding MaxMind database location in Geocoding

Change-Id: Id2efa0292ca06c44a77807db0f0c76245d4a9753
---
M refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
M 
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
M 
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedDataUDF.java
M 
refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
M 
refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedDataUDF.java
5 files changed, 52 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source 
refs/changes/12/188012/1

diff --git 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
index 6dccf0d..6f7eb26 100644
--- 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
+++ 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
@@ -29,6 +29,8 @@
 import com.maxmind.geoip2.model.CityResponse;
 import com.maxmind.geoip2.model.CountryResponse;
 import com.maxmind.geoip2.record.*;
+
+import org.apache.hadoop.mapred.JobConf;
 import org.apache.log4j.Logger;
 
 /**
@@ -56,6 +58,10 @@
     static final Logger LOG = Logger.getLogger(Geocode.class.getName());
 
     public Geocode() throws IOException {
+        this(null);
+    }
+
+    public Geocode(JobConf jobConf) throws IOException {
         // Default paths to Maxmind database
         String defaultCountryDatabasePath = 
"/usr/share/GeoIP/GeoIP2-Country.mmdb";
         String defaultCityDatabasePath = "/usr/share/GeoIP/GeoIP2-City.mmdb";
@@ -63,6 +69,14 @@
         String countryDatabasePath = 
System.getProperty("maxmind.database.country", defaultCountryDatabasePath);
         String cityDatabasePath = System.getProperty("maxmind.database.city", 
defaultCityDatabasePath);
 
+        // Override database paths with Hadoop properties, if they exist
+        if (jobConf != null) {
+            countryDatabasePath = 
jobConf.getTrimmed("maxmind.database.country",
+                    countryDatabasePath);
+            cityDatabasePath = jobConf.getTrimmed("maxmind.database.city",
+                    cityDatabasePath);
+        }
+
         LOG.info("Country Database: " + countryDatabasePath);
         LOG.info("City database: " + cityDatabasePath);
 
diff --git 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
index 9c8acd5..a65e142 100644
--- 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
+++ 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
@@ -20,6 +20,7 @@
 
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
@@ -44,7 +45,9 @@
  *   CREATE TEMPORARY FUNCTION geocode_country as 
'org.wikimedia.analytics.refinery.hive.GeocodedCountryUDF';
  *   SELECT geocode_country(ip) from webrequest where year = 2014 limit 10;
  * The above steps assume that the two required files - GeoIP2-Country.mmdb 
and GeoIP2-City.mmdb - are available
- * in their default path /usr/share/GeoIP.
+ * in their default path /usr/share/GeoIP. If not, then add the following 
steps:
+ *   SET maxmind.database.country=/path/to/GeoIP2-Country.mmdb;
+ *   SET maxmind.database.city=/path/to/GeoIP2-City.mmdb;
  */
 @UDFType(deterministic = true)
 @Description(
@@ -85,21 +88,28 @@
         //Cache the argument to be used in evaluate
         argumentOI = arg1;
 
+        return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    }
+
+    @Override
+    public void configure(MapredContext context) {
         if (geocode == null) {
             try {
-                geocode = new Geocode();
+                geocode = new Geocode(context.getJobConf());
             } catch (IOException ex) {
                 LOG.error(ex);
                 throw new RuntimeException(ex);
             }
         }
 
-        return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+        super.configure(context);
     }
 
     @SuppressWarnings("unchecked")
     @Override
     public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert geocode != null : "Evaluate called without initializing 
'geocode'";
+
         result.clear();
 
         if (arguments.length == 1 && argumentOI != null && arguments[0] != 
null) {
diff --git 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedDataUDF.java
 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedDataUDF.java
index caccd04..20ea66b 100644
--- 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedDataUDF.java
+++ 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedDataUDF.java
@@ -17,6 +17,7 @@
 package org.wikimedia.analytics.refinery.hive;
 
 import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
@@ -43,7 +44,9 @@
  *   CREATE TEMPORARY FUNCTION geocode_data as 
'org.wikimedia.analytics.refinery.hive.GeocodedDataUDF';
  *   SELECT geocode_data(ip)['country'], geocode_data(ip)['city'] from 
webrequest where year = 2014 limit 10;
  * The above steps assume that the two required files - GeoIP2-Country.mmdb 
and GeoIP2-City.mmdb - are available
- * in their default path /usr/share/GeoIP.
+ * in their default path /usr/share/GeoIP. If not, then add the following 
steps:
+ *   SET maxmind.database.country=/path/to/GeoIP2-Country.mmdb;
+ *   SET maxmind.database.city=/path/to/GeoIP2-City.mmdb;
  */
 @UDFType(deterministic = true)
 @Description(name = "geocoded_data", value = "_FUNC_(ip) - "
@@ -96,20 +99,25 @@
 
         argumentOI = arg1;
 
+        result = new HashMap<String, String>();
+
+        return ObjectInspectorFactory.getStandardMapObjectInspector(
+                PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+                PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+    }
+
+    @Override
+    public void configure(MapredContext context) {
         if (geocode == null) {
             try {
-                geocode = new Geocode();
+                geocode = new Geocode(context.getJobConf());
             } catch (IOException ex) {
                 LOG.error(ex);
                 throw new RuntimeException(ex);
             }
         }
 
-        result = new HashMap<String, String>();
-
-        return ObjectInspectorFactory.getStandardMapObjectInspector(
-                PrimitiveObjectInspectorFactory.javaStringObjectInspector,
-                PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+        super.configure(context);
     }
 
     /**
@@ -132,6 +140,8 @@
     @SuppressWarnings("unchecked")
     @Override
     public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert geocode != null : "Evaluate called without initializing 
'geocode'";
+
         result.clear();
 
         if (arguments.length == 1 && argumentOI != null && arguments[0] != 
null) {
diff --git 
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
 
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
index d9d6ff3..1c39d4b 100644
--- 
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
+++ 
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
@@ -15,6 +15,7 @@
  */
 package org.wikimedia.analytics.refinery.hive;
 
+import org.apache.hadoop.hive.ql.exec.MapredContext;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
@@ -23,6 +24,7 @@
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
 import org.junit.Test;
 import static org.junit.Assert.*;
 
@@ -54,6 +56,7 @@
         GeocodedCountryUDF geocodedCountryUDF = new GeocodedCountryUDF();
 
         geocodedCountryUDF.initialize(initArguments);
+        geocodedCountryUDF.configure(MapredContext.init(false, new JobConf()));
 
         //IPv4 addresses taken from Maxmind's test suite
         String ip = "81.2.69.160";
@@ -69,6 +72,7 @@
         GeocodedCountryUDF geocodedCountryUDF = new GeocodedCountryUDF();
 
         geocodedCountryUDF.initialize(initArguments);
+        geocodedCountryUDF.configure(MapredContext.init(false, new JobConf()));
 
         //IPv6 representation of an IPv4 address taken from Maxmind's test 
suite
         String ip = "::ffff:81.2.69.160";
@@ -84,6 +88,7 @@
         GeocodedCountryUDF geocodedCountryUDF = new GeocodedCountryUDF();
 
         geocodedCountryUDF.initialize(initArguments);
+        geocodedCountryUDF.configure(MapredContext.init(false, new JobConf()));
 
         //Invalid IPv4 addresses
         String ip = "-";
diff --git 
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedDataUDF.java
 
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedDataUDF.java
index c5b8070..2cb242a 100644
--- 
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedDataUDF.java
+++ 
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedDataUDF.java
@@ -15,6 +15,7 @@
  */
 package org.wikimedia.analytics.refinery.hive;
 
+import org.apache.hadoop.hive.ql.exec.MapredContext;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
@@ -22,6 +23,7 @@
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.mapred.JobConf;
 import org.junit.Test;
 import static org.junit.Assert.*;
 
@@ -116,6 +118,7 @@
         GeocodedDataUDF geocodedDataUDF = new GeocodedDataUDF();
 
         geocodedDataUDF.initialize(initArguments);
+        geocodedDataUDF.configure(MapredContext.init(false, new JobConf()));
 
         DeferredObject[] args = new DeferredObject[] { new 
DeferredJavaObject(ip) };
         return (Map<String, String>)geocodedDataUDF.evaluate(args);

-- 
To view, visit https://gerrit.wikimedia.org/r/188012
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id2efa0292ca06c44a77807db0f0c76245d4a9753
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to