Repository: incubator-hivemall Updated Branches: refs/heads/master c06378a81 -> bfc5b75b0
Close #90: [HIVEMALL-96-2] Added Geo Spatial UDFs Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/bfc5b75b Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/bfc5b75b Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/bfc5b75b Branch: refs/heads/master Commit: bfc5b75b0e994e47cec3c7feefa7efe2da0b540c Parents: c06378a Author: Makoto Yui <[email protected]> Authored: Mon Jun 26 20:57:46 2017 +0900 Committer: Makoto Yui <[email protected]> Committed: Mon Jun 26 20:57:46 2017 +0900 ---------------------------------------------------------------------- .../geospatial/HaversineDistanceUDF.java | 110 +++++++++++++++++++ .../java/hivemall/geospatial/Lat2TileYUDF.java | 96 ++++++++++++++++ .../java/hivemall/geospatial/Lon2TileXUDF.java | 96 ++++++++++++++++ .../java/hivemall/geospatial/MapURLUDF.java | 4 +- .../main/java/hivemall/geospatial/TileUDF.java | 16 +-- .../java/hivemall/geospatial/TileX2LonUDF.java | 96 ++++++++++++++++ .../java/hivemall/geospatial/TileY2LatUDF.java | 96 ++++++++++++++++ .../knn/similarity/DIMSUMMapperUDTF.java | 34 +++--- .../utils/geospatial/GeoSpatialUtils.java | 52 +++++++-- .../java/hivemall/ftvec/FeatureUDFTest.java | 2 +- .../geospatial/HaversineDistanceUDFTest.java | 105 ++++++++++++++++++ .../hivemall/geospatial/Lat2TileYUDFTest.java | 48 ++++++++ .../hivemall/geospatial/Lon2TileXUDFTest.java | 48 ++++++++ .../hivemall/geospatial/TileX2LonUDFTest.java | 48 ++++++++ .../hivemall/geospatial/TileY2LatUDFTest.java | 48 ++++++++ docs/gitbook/geospatial/latlon.md | 34 +++++- resources/ddl/define-all-as-permanent.hive | 15 +++ resources/ddl/define-all.hive | 15 +++ resources/ddl/define-all.spark | 15 +++ resources/ddl/define-udfs.td.hql | 5 + 20 files changed, 943 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/main/java/hivemall/geospatial/HaversineDistanceUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/geospatial/HaversineDistanceUDF.java b/core/src/main/java/hivemall/geospatial/HaversineDistanceUDF.java new file mode 100644 index 0000000..b01d974 --- /dev/null +++ b/core/src/main/java/hivemall/geospatial/HaversineDistanceUDF.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.geospatial; + +import hivemall.utils.geospatial.GeoSpatialUtils; +import hivemall.utils.hadoop.HiveUtils; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFType; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; + +/** + * A UDF to return Haversine distance between given two points + * + * @link http://www.movable-type.co.uk/scripts/latlong.html + * @link https://en.wikipedia.org/wiki/Haversine_formula + * @link https://rosettacode.org/wiki/Haversine_formula + */ +@Description( + name = "haversine_distance", + value = "_FUNC_(double lat1, double lon1, double lat2, double lon2, [const boolean mile=false])::double" + + " - return distance between two locations in km [or miles] using `haversine` formula", + extended = "Usage: select latlon_distance(lat1, lon1, lat2, lon2) from ...") +@UDFType(deterministic = true, stateful = false) +public final class HaversineDistanceUDF extends GenericUDF { + + private PrimitiveObjectInspector lat1OI, lon1OI; + private PrimitiveObjectInspector lat2OI, lon2OI; + + private boolean inMiles; + private DoubleWritable result; + + @Override + public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { + if (argOIs.length != 4 && argOIs.length != 5) { + throw new UDFArgumentException("_FUNC_ takes 4 or 5 arguments: " + argOIs.length); + } + this.lat1OI = HiveUtils.asDoubleCompatibleOI(argOIs[0]); + this.lon1OI = HiveUtils.asDoubleCompatibleOI(argOIs[1]); + this.lat2OI = HiveUtils.asDoubleCompatibleOI(argOIs[2]); + this.lon2OI = HiveUtils.asDoubleCompatibleOI(argOIs[3]); + this.inMiles = (argOIs.length == 5) && HiveUtils.getConstBoolean(argOIs[4]); + + this.result = new DoubleWritable(); + return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } + + @Override + public DoubleWritable evaluate(DeferredObject[] arguments) throws HiveException { + Object arg0 = arguments[0].get(); + Object arg1 = arguments[1].get(); + Object arg2 = arguments[2].get(); + Object arg3 = arguments[3].get(); + + if (arg0 == null || arg1 == null || arg2 == null || arg3 == null) { + return null; + } + double lat1 = PrimitiveObjectInspectorUtils.getDouble(arg0, lat1OI); + double lon1 = PrimitiveObjectInspectorUtils.getDouble(arg1, lon1OI); + double lat2 = PrimitiveObjectInspectorUtils.getDouble(arg2, lat2OI); + double lon2 = PrimitiveObjectInspectorUtils.getDouble(arg3, lon2OI); + + final double distance; + try { + distance = GeoSpatialUtils.haversineDistance(lat1, lon1, lat2, lon2); + } catch (IllegalArgumentException ex) { + throw new UDFArgumentException(ex); + } + + if (inMiles) { + double miles = distance / 1.609344d; + result.set(miles); + } else { + result.set(distance); + } + + return result; + } + + @Override + public String getDisplayString(String[] children) { + return "haversine_distance(" + Arrays.toString(children) + ")"; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/main/java/hivemall/geospatial/Lat2TileYUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/geospatial/Lat2TileYUDF.java b/core/src/main/java/hivemall/geospatial/Lat2TileYUDF.java new file mode 100644 index 0000000..3a16293 --- /dev/null +++ b/core/src/main/java/hivemall/geospatial/Lat2TileYUDF.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.geospatial; + +import hivemall.utils.geospatial.GeoSpatialUtils; +import hivemall.utils.hadoop.HiveUtils; +import hivemall.utils.lang.Preconditions; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFType; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.io.IntWritable; + +/** + * @link http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames + */ +@Description( + name = "lat2tiley", + value = "_FUNC_(double lat, int zoom)::int - Returns the tile number of the given latitude and zoom level") +@UDFType(deterministic = true, stateful = false) +public final class Lat2TileYUDF extends GenericUDF { + + private PrimitiveObjectInspector latOI; + private PrimitiveObjectInspector zoomOI; + + private IntWritable result; + + @Override + public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { + if (argOIs.length != 2) { + throw new UDFArgumentException("_FUNC_ takes exactly 2 arguments: " + argOIs.length); + } + this.latOI = HiveUtils.asDoubleCompatibleOI(argOIs[0]); + this.zoomOI = HiveUtils.asIntegerOI(argOIs[1]); + + this.result = new IntWritable(); + return PrimitiveObjectInspectorFactory.writableIntObjectInspector; + } + + @Override + public IntWritable evaluate(DeferredObject[] arguments) throws HiveException { + Object arg0 = arguments[0].get(); + Object arg1 = arguments[1].get(); + + if (arg0 == null) { + return null; + } + if (arg1 == null) { + throw new UDFArgumentException("zoom level should not be null"); + } + + double lat = PrimitiveObjectInspectorUtils.getDouble(arg0, latOI); + int zoom = PrimitiveObjectInspectorUtils.getInt(arg1, zoomOI); + Preconditions.checkArgument(zoom >= 0, "Invalid zoom level", UDFArgumentException.class); + + final int y; + try { + y = GeoSpatialUtils.lat2tiley(lat, zoom); + } catch (IllegalArgumentException ex) { + throw new UDFArgumentException(ex); + } + + result.set(y); + return result; + } + + @Override + public String getDisplayString(String[] children) { + return "lat2tiley(" + Arrays.toString(children) + ")"; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/main/java/hivemall/geospatial/Lon2TileXUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/geospatial/Lon2TileXUDF.java b/core/src/main/java/hivemall/geospatial/Lon2TileXUDF.java new file mode 100644 index 0000000..36103ec --- /dev/null +++ b/core/src/main/java/hivemall/geospatial/Lon2TileXUDF.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.geospatial; + +import hivemall.utils.geospatial.GeoSpatialUtils; +import hivemall.utils.hadoop.HiveUtils; +import hivemall.utils.lang.Preconditions; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFType; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.io.IntWritable; + +/** + * @link http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames + */ +@Description( + name = "lon2tilex", + value = "_FUNC_(double lon, int zoom)::int - Returns the tile number of the given longitude and zoom level") +@UDFType(deterministic = true, stateful = false) +public final class Lon2TileXUDF extends GenericUDF { + + private PrimitiveObjectInspector lonOI; + private PrimitiveObjectInspector zoomOI; + + private IntWritable result; + + @Override + public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { + if (argOIs.length != 2) { + throw new UDFArgumentException("_FUNC_ takes exactly 2 arguments: " + argOIs.length); + } + this.lonOI = HiveUtils.asDoubleCompatibleOI(argOIs[0]); + this.zoomOI = HiveUtils.asIntegerOI(argOIs[1]); + + this.result = new IntWritable(); + return PrimitiveObjectInspectorFactory.writableIntObjectInspector; + } + + @Override + public IntWritable evaluate(DeferredObject[] arguments) throws HiveException { + Object arg0 = arguments[0].get(); + Object arg1 = arguments[1].get(); + + if (arg0 == null) { + return null; + } + if (arg1 == null) { + throw new UDFArgumentException("zoom level should not be null"); + } + + double lon = PrimitiveObjectInspectorUtils.getDouble(arg0, lonOI); + int zoom = PrimitiveObjectInspectorUtils.getInt(arg1, zoomOI); + Preconditions.checkArgument(zoom >= 0, "Invalid zoom level", UDFArgumentException.class); + + final int x; + try { + x = GeoSpatialUtils.lon2tilex(lon, zoom); + } catch (IllegalArgumentException ex) { + throw new UDFArgumentException(ex); + } + + result.set(x); + return result; + } + + @Override + public String getDisplayString(String[] children) { + return "lon2tilex(" + Arrays.toString(children) + ")"; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/main/java/hivemall/geospatial/MapURLUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/geospatial/MapURLUDF.java b/core/src/main/java/hivemall/geospatial/MapURLUDF.java index c2423e8..ed06604 100644 --- a/core/src/main/java/hivemall/geospatial/MapURLUDF.java +++ b/core/src/main/java/hivemall/geospatial/MapURLUDF.java @@ -133,8 +133,8 @@ public final class MapURLUDF extends UDFWithOptions { } final int xtile, ytile; try { - xtile = GeoSpatialUtils.lon2tile(lon, zoom); - ytile = GeoSpatialUtils.lat2tile(lat, zoom); + xtile = GeoSpatialUtils.lon2tilex(lon, zoom); + ytile = GeoSpatialUtils.lat2tiley(lat, zoom); } catch (IllegalArgumentException ex) { throw new UDFArgumentException(ex); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/main/java/hivemall/geospatial/TileUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/geospatial/TileUDF.java b/core/src/main/java/hivemall/geospatial/TileUDF.java index 074b6a4..164aa56 100644 --- a/core/src/main/java/hivemall/geospatial/TileUDF.java +++ b/core/src/main/java/hivemall/geospatial/TileUDF.java @@ -33,15 +33,15 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; /** * @link http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames */ @Description( name = "tile", - value = "_FUNC_(double lat, double lon, int zoom)::INT - Returns a tile number 2^2n where n is zoom level.\n" - + "_FUNC_(lat,lon,zoom) = xtile(lon,zoom) + ytile(lat,zoom) * 2^n", + value = "_FUNC_(double lat, double lon, int zoom)::bigint - Returns a tile number 2^2n where n is zoom level.\n" + + "_FUNC_(lat,lon,zoom) = xtile(lon,zoom) + ytile(lat,zoom) * 2^zoom", extended = "refer http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames for detail") @UDFType(deterministic = true, stateful = false) public final class TileUDF extends GenericUDF { @@ -50,7 +50,7 @@ public final class TileUDF extends GenericUDF { private PrimitiveObjectInspector lonOI; private PrimitiveObjectInspector zoomOI; - private IntWritable result; + private LongWritable result; @Override public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { @@ -61,12 +61,12 @@ public final class TileUDF extends GenericUDF { this.lonOI = HiveUtils.asDoubleCompatibleOI(argOIs[1]); this.zoomOI = HiveUtils.asIntegerOI(argOIs[2]); - this.result = new IntWritable(); - return PrimitiveObjectInspectorFactory.writableIntObjectInspector; + this.result = new LongWritable(); + return PrimitiveObjectInspectorFactory.writableLongObjectInspector; } @Override - public IntWritable evaluate(DeferredObject[] arguments) throws HiveException { + public LongWritable evaluate(DeferredObject[] arguments) throws HiveException { Object arg0 = arguments[0].get(); Object arg1 = arguments[1].get(); Object arg2 = arguments[2].get(); @@ -83,7 +83,7 @@ public final class TileUDF extends GenericUDF { int zoom = PrimitiveObjectInspectorUtils.getInt(arg2, zoomOI); Preconditions.checkArgument(zoom >= 0, "Invalid zoom level", UDFArgumentException.class); - final int tile; + final long tile; try { tile = GeoSpatialUtils.tile(lat, lon, zoom); } catch (IllegalArgumentException ex) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/main/java/hivemall/geospatial/TileX2LonUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/geospatial/TileX2LonUDF.java b/core/src/main/java/hivemall/geospatial/TileX2LonUDF.java new file mode 100644 index 0000000..5979227 --- /dev/null +++ b/core/src/main/java/hivemall/geospatial/TileX2LonUDF.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.geospatial; + +import hivemall.utils.geospatial.GeoSpatialUtils; +import hivemall.utils.hadoop.HiveUtils; +import hivemall.utils.lang.Preconditions; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFType; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; + +/** + * @link http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames + */ +@Description( + name = "tilex2lon", + value = "_FUNC_(int x, int zoom)::double - Returns longitude of the given tile x and zoom level") +@UDFType(deterministic = true, stateful = false) +public final class TileX2LonUDF extends GenericUDF { + + private PrimitiveObjectInspector xOI; + private PrimitiveObjectInspector zoomOI; + + private DoubleWritable result; + + @Override + public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { + if (argOIs.length != 2) { + throw new UDFArgumentException("_FUNC_ takes exactly 2 arguments: " + argOIs.length); + } + this.xOI = HiveUtils.asIntegerOI(argOIs[0]); + this.zoomOI = HiveUtils.asIntegerOI(argOIs[1]); + + this.result = new DoubleWritable(); + return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } + + @Override + public DoubleWritable evaluate(DeferredObject[] arguments) throws HiveException { + Object arg0 = arguments[0].get(); + Object arg1 = arguments[1].get(); + + if (arg0 == null) { + return null; + } + if (arg1 == null) { + throw new UDFArgumentException("zoom level should not be null"); + } + + int x = PrimitiveObjectInspectorUtils.getInt(arg0, xOI); + int zoom = PrimitiveObjectInspectorUtils.getInt(arg1, zoomOI); + Preconditions.checkArgument(zoom >= 0, "Invalid zoom level", UDFArgumentException.class); + + final double lon; + try { + lon = GeoSpatialUtils.tilex2lon(x, zoom); + } catch (IllegalArgumentException ex) { + throw new UDFArgumentException(ex); + } + + result.set(lon); + return result; + } + + @Override + public String getDisplayString(String[] children) { + return "tilex2lon(" + Arrays.toString(children) + ")"; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/main/java/hivemall/geospatial/TileY2LatUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/geospatial/TileY2LatUDF.java b/core/src/main/java/hivemall/geospatial/TileY2LatUDF.java new file mode 100644 index 0000000..f2e6da0 --- /dev/null +++ b/core/src/main/java/hivemall/geospatial/TileY2LatUDF.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.geospatial; + +import hivemall.utils.geospatial.GeoSpatialUtils; +import hivemall.utils.hadoop.HiveUtils; +import hivemall.utils.lang.Preconditions; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFType; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; + +/** + * @link http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames + */ +@Description( + name = "tiley2lat", + value = "_FUNC_(int y, int zoom)::double - Returns latitude of the given tile y and zoom level") +@UDFType(deterministic = true, stateful = false) +public final class TileY2LatUDF extends GenericUDF { + + private PrimitiveObjectInspector yOI; + private PrimitiveObjectInspector zoomOI; + + private DoubleWritable result; + + @Override + public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { + if (argOIs.length != 2) { + throw new UDFArgumentException("_FUNC_ takes exactly 2 arguments: " + argOIs.length); + } + this.yOI = HiveUtils.asIntegerOI(argOIs[0]); + this.zoomOI = HiveUtils.asIntegerOI(argOIs[1]); + + this.result = new DoubleWritable(); + return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } + + @Override + public DoubleWritable evaluate(DeferredObject[] arguments) throws HiveException { + Object arg0 = arguments[0].get(); + Object arg1 = arguments[1].get(); + + if (arg0 == null) { + return null; + } + if (arg1 == null) { + throw new UDFArgumentException("zoom level should not be null"); + } + + int y = PrimitiveObjectInspectorUtils.getInt(arg0, yOI); + int zoom = PrimitiveObjectInspectorUtils.getInt(arg1, zoomOI); + Preconditions.checkArgument(zoom >= 0, "Invalid zoom level", UDFArgumentException.class); + + final double lat; + try { + lat = GeoSpatialUtils.tiley2lat(y, zoom); + } catch (IllegalArgumentException ex) { + throw new UDFArgumentException(ex); + } + + result.set(lat); + return result; + } + + @Override + public String getDisplayString(String[] children) { + return "tiley2lat(" + Arrays.toString(children) + ")"; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/main/java/hivemall/knn/similarity/DIMSUMMapperUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/knn/similarity/DIMSUMMapperUDTF.java b/core/src/main/java/hivemall/knn/similarity/DIMSUMMapperUDTF.java index 740b2da..58c0e9a 100644 --- a/core/src/main/java/hivemall/knn/similarity/DIMSUMMapperUDTF.java +++ b/core/src/main/java/hivemall/knn/similarity/DIMSUMMapperUDTF.java @@ -27,25 +27,29 @@ import hivemall.math.random.RandomNumberGeneratorFactory; import hivemall.utils.hadoop.HiveUtils; import hivemall.utils.lang.Primitives; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Options; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.objectinspector.*; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; - @Description( name = "dimsum_mapper", value = "_FUNC_(array<string> row, map<int col_id, double norm> colNorms [, const string options]) " @@ -76,7 +80,7 @@ public final class DIMSUMMapperUDTF extends UDTFWithOptions { "Theoretically, similarities above this threshold are estimated [default: 0.5]"); opts.addOption("g", "gamma", true, "Oversampling parameter; if `gamma` is given, `threshold` will be ignored" - + " [default: 10 * log(numCols) / threshold]"); + + " [default: 10 * log(numCols) / threshold]"); opts.addOption("disable_symmetric", "disable_symmetric_output", false, "Output only contains (col j, col k) pair; symmetric (col k, col j) pair is omitted"); opts.addOption("int_feature", "feature_as_integer", false, @@ -119,11 +123,9 @@ public final class DIMSUMMapperUDTF extends UDTFWithOptions { @Override public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { if (argOIs.length != 2 && argOIs.length != 3) { - throw new UDFArgumentException( - getClass().getSimpleName() - + " takes 2 or 3 arguments: array<string> x, map<long, double> colNorms " - + "[, CONSTANT STRING options]: " - + Arrays.toString(argOIs)); + throw new UDFArgumentException(getClass().getSimpleName() + + " takes 2 or 3 arguments: array<string> x, map<long, double> colNorms " + + "[, CONSTANT STRING options]: " + Arrays.toString(argOIs)); } this.rowOI = HiveUtils.asListOI(argOIs[0]); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/main/java/hivemall/utils/geospatial/GeoSpatialUtils.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/geospatial/GeoSpatialUtils.java b/core/src/main/java/hivemall/utils/geospatial/GeoSpatialUtils.java index 1cc254e..fdccc95 100644 --- a/core/src/main/java/hivemall/utils/geospatial/GeoSpatialUtils.java +++ b/core/src/main/java/hivemall/utils/geospatial/GeoSpatialUtils.java @@ -20,9 +20,18 @@ package hivemall.utils.geospatial; import static hivemall.utils.math.MathUtils.sec; import static java.lang.Math.PI; +import static java.lang.Math.atan; +import static java.lang.Math.atan2; +import static java.lang.Math.cos; import static java.lang.Math.floor; import static java.lang.Math.log; +import static java.lang.Math.pow; +import static java.lang.Math.sin; +import static java.lang.Math.sinh; +import static java.lang.Math.sqrt; import static java.lang.Math.tan; +import static java.lang.Math.toDegrees; +import static java.lang.Math.toRadians; import javax.annotation.Nonnegative; @@ -33,30 +42,39 @@ public final class GeoSpatialUtils { private GeoSpatialUtils() {} - public static int lon2tile(final double lon, @Nonnegative final int zoom) { + public static int lon2tilex(final double lon, @Nonnegative final int zoom) { if (lon < -180.d || lon > 180.d) { throw new IllegalArgumentException("Longitude must be in range [-180,+180]: " + lon); } return (int) floor((lon + 180.d) / 360.d * (1 << zoom)); } - public static int lat2tile(final double lat, @Nonnegative final int zoom) { + public static int lat2tiley(final double lat, @Nonnegative final int zoom) { if (lat < MIN_LATITUDE || lat > MAX_LATITUDE) { throw new IllegalArgumentException("Latitude must be in range [-85.0511,+85.0511]: " + lat + "\nSee http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames"); } - double lat_rad = Math.toRadians(lat); + double lat_rad = toRadians(lat); int n = 1 << zoom; return (int) floor((1.d - log(tan(lat_rad) + sec(lat_rad)) / PI) / 2.d * n); } + public static double tilex2lon(final int x, @Nonnegative final int zoom) { + return x / pow(2.d, zoom) * 360.d - 180.d; + } + + public static double tiley2lat(final int y, @Nonnegative final int zoom) { + double n = PI - (2.d * PI * y) / pow(2.d, zoom); + return toDegrees(atan(sinh(n))); + } + /** * @link https://en.wikipedia.org/wiki/Tiled_web_map#Tile_numbering_schemes */ - public static int tile(final double lat, final double lon, @Nonnegative final int zoom) { - int xtile = lon2tile(lon, zoom); - int ytile = lat2tile(lat, zoom); - int n = 1 << zoom; // 2^z + public static long tile(final double lat, final double lon, @Nonnegative final int zoom) { + int xtile = lon2tilex(lon, zoom); + int ytile = lat2tiley(lat, zoom); + long n = 1L << zoom; // 2^z return xtile + (n * ytile); } @@ -64,4 +82,24 @@ public final class GeoSpatialUtils { return 1 << (zoom * 2); // 2^2z } + /** + * Return a Haversine distance in Kilometers between two points. + * + * @link http://www.movable-type.co.uk/scripts/latlong.html + * @link http://rosettacode.org/wiki/Haversine_formula#Java + * @return distance between two points in Kilometers + */ + public static double haversineDistance(final double lat1, final double lon1, final double lat2, + final double lon2) { + double R = 6371.0d; // Radius of the earth in Km + double dLat = toRadians(lat2 - lat1); // deg2rad below + double dLon = toRadians(lon2 - lon1); + double sinDLat = sin(dLat / 2.d); + double sinDLon = sin(dLon / 2.d); + double a = sinDLat * sinDLat + cos(toRadians(lat1)) * cos(toRadians(lat2)) * sinDLon + * sinDLon; + double c = 2.d * atan2(sqrt(a), sqrt(1.d - a)); + return R * c; // Distance in Km + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java b/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java index 0eb1289..55dd803 100644 --- a/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java +++ b/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java @@ -21,9 +21,9 @@ package hivemall.ftvec; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.Text; import org.junit.Assert; import org.junit.Before; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/test/java/hivemall/geospatial/HaversineDistanceUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/geospatial/HaversineDistanceUDFTest.java b/core/src/test/java/hivemall/geospatial/HaversineDistanceUDFTest.java new file mode 100644 index 0000000..af5316a --- /dev/null +++ b/core/src/test/java/hivemall/geospatial/HaversineDistanceUDFTest.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.geospatial; + +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class HaversineDistanceUDFTest { + + @Test + public void testKilometers1() throws HiveException, IOException { + HaversineDistanceUDF udf = new HaversineDistanceUDF(); + udf.initialize(new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector}); + + // Tokyo + double lat1 = 35.6833d, lon1 = 139.7667d; + // Osaka + double lat2 = 34.6603d, lon2 = 135.5232d; + + DoubleWritable result1 = udf.evaluate(new DeferredObject[] {new DeferredJavaObject(lat1), + new DeferredJavaObject(lon1), new DeferredJavaObject(lat2), + new DeferredJavaObject(lon2)}); + Assert.assertEquals(402.092d, result1.get(), 0.001d); + + udf.close(); + } + + @Test + public void testKilometers2() throws HiveException, IOException { + HaversineDistanceUDF udf = new HaversineDistanceUDF(); + udf.initialize(new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, false)}); + + // Tokyo + double lat1 = 35.6833d, lon1 = 139.7667d; + // Osaka + double lat2 = 34.6603d, lon2 = 135.5232d; + + DoubleWritable result1 = udf.evaluate(new DeferredObject[] {new DeferredJavaObject(lat1), + new DeferredJavaObject(lon1), new DeferredJavaObject(lat2), + new DeferredJavaObject(lon2)}); + Assert.assertEquals(402.092d, result1.get(), 0.001d); + + udf.close(); + } + + @Test + public void testMiles() throws HiveException, IOException { + HaversineDistanceUDF udf = new HaversineDistanceUDF(); + udf.initialize(new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, true)}); + + // Tokyo + double lat1 = 35.6833d, lon1 = 139.7667d; + // Osaka + double lat2 = 34.6603d, lon2 = 135.5232d; + + DoubleWritable result1 = udf.evaluate(new DeferredObject[] {new DeferredJavaObject(lat1), + new DeferredJavaObject(lon1), new DeferredJavaObject(lat2), + new DeferredJavaObject(lon2), new DeferredJavaObject(true)}); + Assert.assertEquals(249.84d, result1.get(), 0.1d); + + udf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/test/java/hivemall/geospatial/Lat2TileYUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/geospatial/Lat2TileYUDFTest.java b/core/src/test/java/hivemall/geospatial/Lat2TileYUDFTest.java new file mode 100644 index 0000000..8fa1ada --- /dev/null +++ b/core/src/test/java/hivemall/geospatial/Lat2TileYUDFTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.geospatial; + +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.IntWritable; +import org.junit.Assert; +import org.junit.Test; + +public class Lat2TileYUDFTest { + + @Test + public void testEvaluate() throws HiveException, IOException { + Lat2TileYUDF udf = new Lat2TileYUDF(); + udf.initialize(new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector}); + + IntWritable result1 = udf.evaluate(new DeferredObject[] {new DeferredJavaObject(49.60055d), + new DeferredJavaObject(13)}); + Assert.assertEquals(2792, result1.get()); + + udf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/test/java/hivemall/geospatial/Lon2TileXUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/geospatial/Lon2TileXUDFTest.java b/core/src/test/java/hivemall/geospatial/Lon2TileXUDFTest.java new file mode 100644 index 0000000..cd82826 --- /dev/null +++ b/core/src/test/java/hivemall/geospatial/Lon2TileXUDFTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.geospatial; + +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.IntWritable; +import org.junit.Assert; +import org.junit.Test; + +public class Lon2TileXUDFTest { + + @Test + public void testEvaluate() throws HiveException, IOException { + Lon2TileXUDF udf = new Lon2TileXUDF(); + udf.initialize(new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector}); + + IntWritable result1 = udf.evaluate(new DeferredObject[] {new DeferredJavaObject(11.01296d), + new DeferredJavaObject(13)}); + Assert.assertEquals(4346, result1.get()); + + udf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/test/java/hivemall/geospatial/TileX2LonUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/geospatial/TileX2LonUDFTest.java b/core/src/test/java/hivemall/geospatial/TileX2LonUDFTest.java new file mode 100644 index 0000000..ce2b0c9 --- /dev/null +++ b/core/src/test/java/hivemall/geospatial/TileX2LonUDFTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.geospatial; + +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class TileX2LonUDFTest { + + @Test + public void testEvaluate() throws IOException, HiveException { + TileX2LonUDF udf = new TileX2LonUDF(); + udf.initialize(new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector}); + + DoubleWritable result = udf.evaluate(new DeferredObject[] {new DeferredJavaObject(3551), + new DeferredJavaObject(13)}); + Assert.assertEquals(-23.95019531d, result.get(), 0.001); + + udf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/core/src/test/java/hivemall/geospatial/TileY2LatUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/geospatial/TileY2LatUDFTest.java b/core/src/test/java/hivemall/geospatial/TileY2LatUDFTest.java new file mode 100644 index 0000000..5f4b516 --- /dev/null +++ b/core/src/test/java/hivemall/geospatial/TileY2LatUDFTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.geospatial; + +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class TileY2LatUDFTest { + + @Test + public void testEvaluate() throws IOException, HiveException { + TileY2LatUDF udf = new TileY2LatUDF(); + udf.initialize(new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector}); + + DoubleWritable result = udf.evaluate(new DeferredObject[] {new DeferredJavaObject(503), + new DeferredJavaObject(14)}); + Assert.assertEquals(83.99996604d, result.get(), 0.001); + + udf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/docs/gitbook/geospatial/latlon.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/geospatial/latlon.md b/docs/gitbook/geospatial/latlon.md index 96ca11b..6c86bd1 100644 --- a/docs/gitbook/geospatial/latlon.md +++ b/docs/gitbook/geospatial/latlon.md @@ -61,16 +61,38 @@ WITH data as ( select null as lat, 0.02435 as lon, 17 as zoom ) select - tile(lat, lon, zoom) as tile + lat, lon, zoom, + tile(lat, lon, zoom) as tile, + (lon2tilex(lon,zoom) + lat2tiley(lat,zoom) * cast(pow(2, zoom) as bigint)) as tile2, + lon2tilex(lon, zoom) as xtile, + lat2tiley(lat, zoom) as ytile, + tiley2lat(lat2tiley(lat, zoom), zoom) as lat2, -- tiley2lat returns center of the tile + tilex2lon(lon2tilex(lon, zoom), zoom) as lon2 -- tilex2lon returns center of the tile from data; ``` -| tile | -|:--:| -|1417478152| -|88| -|NULL| +| lat | lon | zoom | tile | tile2 | xtile | ytile | lat2 | lon2 | +|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| +| 51.51202 | 0.02435 | 17 | 5712445448 | 5712445448 | 65544 | 43582 | 51.512161249555156 | 0.02197265625 | +| 51.51202 | 0.02435 | 4 | 88 | 88 | 8 | 5 | 55.77657301866768 | 0.0 | +| NULL | 0.02435 | 17 | NULL | NULL | 65544 | NULL | NULL | 0.02197265625 | + +# Distance function + +`haversine_distance(double lat1, double lon1, double lat2, double lon2, [const boolean mile=false])` returns [Haversine distance](http://www.movable-type.co.uk/scripts/latlong.html) between given two Geo locations. + +```sql +-- Tokyo (lat: 35.6833, lon: 139.7667) +-- Osaka (lat: 34.6603, lon: 135.5232) +select + haversine_distance(35.6833, 139.7667, 34.6603, 135.5232) as km, + haversine_distance(35.6833, 139.7667, 34.6603, 135.5232, true) as mile; +``` + +| km | mile | +|:-:|:-:| +| 402.09212137829684 | 249.8484608500711 | # Map URL function http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/resources/ddl/define-all-as-permanent.hive ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all-as-permanent.hive b/resources/ddl/define-all-as-permanent.hive index 0485890..075e733 100644 --- a/resources/ddl/define-all-as-permanent.hive +++ b/resources/ddl/define-all-as-permanent.hive @@ -658,6 +658,21 @@ CREATE FUNCTION tile as 'hivemall.geospatial.TileUDF' USING JAR '${hivemall_jar} DROP FUNCTION IF EXISTS map_url; CREATE FUNCTION map_url as 'hivemall.geospatial.MapURLUDF' USING JAR '${hivemall_jar}'; +DROP FUNCTION IF EXISTS lat2tiley; +CREATE FUNCTION lat2tiley as 'hivemall.geospatial.Lat2TileYUDF' USING JAR '${hivemall_jar}'; + +DROP FUNCTION IF EXISTS lon2tilex; +CREATE FUNCTION lon2tilex as 'hivemall.geospatial.Lon2TileXUDF' USING JAR '${hivemall_jar}'; + +DROP FUNCTION IF EXISTS tilex2lon; +CREATE FUNCTION tilex2lon as 'hivemall.geospatial.TileX2LonUDF' USING JAR '${hivemall_jar}'; + +DROP FUNCTION IF EXISTS tiley2lat; +CREATE FUNCTION tiley2lat as 'hivemall.geospatial.TileY2LatUDF' USING JAR '${hivemall_jar}'; + +DROP FUNCTION IF EXISTS haversine_distance; +CREATE FUNCTION haversine_distance as 'hivemall.geospatial.HaversineDistanceUDF' USING JAR '${hivemall_jar}'; + ---------------------------- -- Smile related features -- ---------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/resources/ddl/define-all.hive ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all.hive b/resources/ddl/define-all.hive index bd79a6b..7f5c727 100644 --- a/resources/ddl/define-all.hive +++ b/resources/ddl/define-all.hive @@ -650,6 +650,21 @@ create temporary function tile as 'hivemall.geospatial.TileUDF'; drop temporary function if exists map_url; create temporary function map_url as 'hivemall.geospatial.MapURLUDF'; +drop temporary function if exists lat2tiley; +create temporary function lat2tiley as 'hivemall.geospatial.Lat2TileYUDF'; + +drop temporary function if exists lon2tilex; +create temporary function lon2tilex as 'hivemall.geospatial.Lon2TileXUDF'; + +drop temporary function if exists tilex2lon; +create temporary function tilex2lon as 'hivemall.geospatial.TileX2LonUDF'; + +drop temporary function if exists tiley2lat; +create temporary function tiley2lat as 'hivemall.geospatial.TileY2LatUDF'; + +drop temporary function if exists haversine_distance; +create temporary function haversine_distance as 'hivemall.geospatial.HaversineDistanceUDF'; + ---------------------------- -- Smile related features -- ---------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/resources/ddl/define-all.spark ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all.spark b/resources/ddl/define-all.spark index 261fb8d..fc4a60e 100644 --- a/resources/ddl/define-all.spark +++ b/resources/ddl/define-all.spark @@ -634,6 +634,21 @@ sqlContext.sql("CREATE TEMPORARY FUNCTION tile AS 'hivemall.geospatial.TileUDF'" sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS map_url") sqlContext.sql("CREATE TEMPORARY FUNCTION map_url AS 'hivemall.geospatial.MapURLUDF'") +sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS lat2tiley") +sqlContext.sql("CREATE TEMPORARY FUNCTION lat2tiley AS 'hivemall.geospatial.Lat2TileYUDF'") + +sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS lon2tilex") +sqlContext.sql("CREATE TEMPORARY FUNCTION lon2tilex AS 'hivemall.geospatial.Lon2TileXUDF'") + +sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS tilex2lon") +sqlContext.sql("CREATE TEMPORARY FUNCTION tilex2lon AS 'hivemall.geospatial.TileX2LonUDF'") + +sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS tiley2lat") +sqlContext.sql("CREATE TEMPORARY FUNCTION tiley2lat AS 'hivemall.geospatial.TileY2LatUDF'") + +sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS haversine_distance") +sqlContext.sql("CREATE TEMPORARY FUNCTION haversine_distance AS 'hivemall.geospatial.HaversineDistanceUDF'") + /** * Smile related features */ http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bfc5b75b/resources/ddl/define-udfs.td.hql ---------------------------------------------------------------------- diff --git a/resources/ddl/define-udfs.td.hql b/resources/ddl/define-udfs.td.hql index 7fba0d6..1d11d1a 100644 --- a/resources/ddl/define-udfs.td.hql +++ b/resources/ddl/define-udfs.td.hql @@ -164,6 +164,11 @@ create temporary function train_plsa as 'hivemall.topicmodel.PLSAUDTF'; create temporary function plsa_predict as 'hivemall.topicmodel.PLSAPredictUDAF'; create temporary function tile as 'hivemall.geospatial.TileUDF'; create temporary function map_url as 'hivemall.geospatial.MapURLUDF'; +create temporary function lat2tiley as 'hivemall.geospatial.Lat2TileYUDF'; +create temporary function lon2tilex as 'hivemall.geospatial.Lon2TileXUDF'; +create temporary function tilex2lon as 'hivemall.geospatial.TileX2LonUDF'; +create temporary function tiley2lat as 'hivemall.geospatial.TileY2LatUDF'; +create temporary function haversine_distance as 'hivemall.geospatial.HaversineDistanceUDF'; create temporary function l2_norm as 'hivemall.tools.math.L2NormUDAF'; create temporary function dimsum_mapper as 'hivemall.knn.similarity.DIMSUMMapperUDTF'; create temporary function train_classifier as 'hivemall.classifier.GeneralClassifierUDTF';
