Hello Ottomata,
I'd like you to do a code review. Please visit
https://gerrit.wikimedia.org/r/188012
to review the following change.
Change subject: Fix overriding MaxMind database location in Geocoding
......................................................................
Fix overriding MaxMind database location in Geocoding
Change-Id: Id2efa0292ca06c44a77807db0f0c76245d4a9753
---
M refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
M
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
M
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedDataUDF.java
M
refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
M
refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedDataUDF.java
5 files changed, 52 insertions(+), 10 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source
refs/changes/12/188012/1
diff --git
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
index 6dccf0d..6f7eb26 100644
---
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
+++
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
@@ -29,6 +29,8 @@
import com.maxmind.geoip2.model.CityResponse;
import com.maxmind.geoip2.model.CountryResponse;
import com.maxmind.geoip2.record.*;
+
+import org.apache.hadoop.mapred.JobConf;
import org.apache.log4j.Logger;
/**
@@ -56,6 +58,10 @@
static final Logger LOG = Logger.getLogger(Geocode.class.getName());
public Geocode() throws IOException {
+ this(null);
+ }
+
+ public Geocode(JobConf jobConf) throws IOException {
// Default paths to Maxmind database
String defaultCountryDatabasePath =
"/usr/share/GeoIP/GeoIP2-Country.mmdb";
String defaultCityDatabasePath = "/usr/share/GeoIP/GeoIP2-City.mmdb";
@@ -63,6 +69,14 @@
String countryDatabasePath =
System.getProperty("maxmind.database.country", defaultCountryDatabasePath);
String cityDatabasePath = System.getProperty("maxmind.database.city",
defaultCityDatabasePath);
+ // Override database paths with Hadoop properties, if they exist
+ if (jobConf != null) {
+ countryDatabasePath =
jobConf.getTrimmed("maxmind.database.country",
+ countryDatabasePath);
+ cityDatabasePath = jobConf.getTrimmed("maxmind.database.city",
+ cityDatabasePath);
+ }
+
LOG.info("Country Database: " + countryDatabasePath);
LOG.info("City database: " + cityDatabasePath);
diff --git
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
index 9c8acd5..a65e142 100644
---
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
+++
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
@@ -20,6 +20,7 @@
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
@@ -44,7 +45,9 @@
* CREATE TEMPORARY FUNCTION geocode_country as
'org.wikimedia.analytics.refinery.hive.GeocodedCountryUDF';
* SELECT geocode_country(ip) from webrequest where year = 2014 limit 10;
* The above steps assume that the two required files - GeoIP2-Country.mmdb
and GeoIP2-City.mmdb - are available
- * in their default path /usr/share/GeoIP.
+ * in their default path /usr/share/GeoIP. If not, then add the following
steps:
+ * SET maxmind.database.country=/path/to/GeoIP2-Country.mmdb;
+ * SET maxmind.database.city=/path/to/GeoIP2-City.mmdb;
*/
@UDFType(deterministic = true)
@Description(
@@ -85,21 +88,28 @@
//Cache the argument to be used in evaluate
argumentOI = arg1;
+ return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+ }
+
+ @Override
+ public void configure(MapredContext context) {
if (geocode == null) {
try {
- geocode = new Geocode();
+ geocode = new Geocode(context.getJobConf());
} catch (IOException ex) {
LOG.error(ex);
throw new RuntimeException(ex);
}
}
- return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+ super.configure(context);
}
@SuppressWarnings("unchecked")
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ assert geocode != null : "Evaluate called without initializing
'geocode'";
+
result.clear();
if (arguments.length == 1 && argumentOI != null && arguments[0] !=
null) {
diff --git
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedDataUDF.java
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedDataUDF.java
index caccd04..20ea66b 100644
---
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedDataUDF.java
+++
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedDataUDF.java
@@ -17,6 +17,7 @@
package org.wikimedia.analytics.refinery.hive;
import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
@@ -43,7 +44,9 @@
* CREATE TEMPORARY FUNCTION geocode_data as
'org.wikimedia.analytics.refinery.hive.GeocodedDataUDF';
* SELECT geocode_data(ip)['country'], geocode_data(ip)['city'] from
webrequest where year = 2014 limit 10;
* The above steps assume that the two required files - GeoIP2-Country.mmdb
and GeoIP2-City.mmdb - are available
- * in their default path /usr/share/GeoIP.
+ * in their default path /usr/share/GeoIP. If not, then add the following
steps:
+ * SET maxmind.database.country=/path/to/GeoIP2-Country.mmdb;
+ * SET maxmind.database.city=/path/to/GeoIP2-City.mmdb;
*/
@UDFType(deterministic = true)
@Description(name = "geocoded_data", value = "_FUNC_(ip) - "
@@ -96,20 +99,25 @@
argumentOI = arg1;
+ result = new HashMap<String, String>();
+
+ return ObjectInspectorFactory.getStandardMapObjectInspector(
+ PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+ PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+ }
+
+ @Override
+ public void configure(MapredContext context) {
if (geocode == null) {
try {
- geocode = new Geocode();
+ geocode = new Geocode(context.getJobConf());
} catch (IOException ex) {
LOG.error(ex);
throw new RuntimeException(ex);
}
}
- result = new HashMap<String, String>();
-
- return ObjectInspectorFactory.getStandardMapObjectInspector(
- PrimitiveObjectInspectorFactory.javaStringObjectInspector,
- PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+ super.configure(context);
}
/**
@@ -132,6 +140,8 @@
@SuppressWarnings("unchecked")
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ assert geocode != null : "Evaluate called without initializing
'geocode'";
+
result.clear();
if (arguments.length == 1 && argumentOI != null && arguments[0] !=
null) {
diff --git
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
index d9d6ff3..1c39d4b 100644
---
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
+++
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
@@ -15,6 +15,7 @@
*/
package org.wikimedia.analytics.refinery.hive;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
@@ -23,6 +24,7 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
import org.junit.Test;
import static org.junit.Assert.*;
@@ -54,6 +56,7 @@
GeocodedCountryUDF geocodedCountryUDF = new GeocodedCountryUDF();
geocodedCountryUDF.initialize(initArguments);
+ geocodedCountryUDF.configure(MapredContext.init(false, new JobConf()));
//IPv4 addresses taken from Maxmind's test suite
String ip = "81.2.69.160";
@@ -69,6 +72,7 @@
GeocodedCountryUDF geocodedCountryUDF = new GeocodedCountryUDF();
geocodedCountryUDF.initialize(initArguments);
+ geocodedCountryUDF.configure(MapredContext.init(false, new JobConf()));
//IPv6 representation of an IPv4 address taken from Maxmind's test
suite
String ip = "::ffff:81.2.69.160";
@@ -84,6 +88,7 @@
GeocodedCountryUDF geocodedCountryUDF = new GeocodedCountryUDF();
geocodedCountryUDF.initialize(initArguments);
+ geocodedCountryUDF.configure(MapredContext.init(false, new JobConf()));
//Invalid IPv4 addresses
String ip = "-";
diff --git
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedDataUDF.java
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedDataUDF.java
index c5b8070..2cb242a 100644
---
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedDataUDF.java
+++
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedDataUDF.java
@@ -15,6 +15,7 @@
*/
package org.wikimedia.analytics.refinery.hive;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
@@ -22,6 +23,7 @@
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.mapred.JobConf;
import org.junit.Test;
import static org.junit.Assert.*;
@@ -116,6 +118,7 @@
GeocodedDataUDF geocodedDataUDF = new GeocodedDataUDF();
geocodedDataUDF.initialize(initArguments);
+ geocodedDataUDF.configure(MapredContext.init(false, new JobConf()));
DeferredObject[] args = new DeferredObject[] { new
DeferredJavaObject(ip) };
return (Map<String, String>)geocodedDataUDF.evaluate(args);
--
To view, visit https://gerrit.wikimedia.org/r/188012
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Id2efa0292ca06c44a77807db0f0c76245d4a9753
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits