This is an automated email from the ASF dual-hosted git repository.
fgreg pushed a commit to branch 1.1.0-SNAPSHOT
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
The following commit(s) were added to refs/heads/1.1.0-SNAPSHOT by this push:
new c9de8ec SDAP-196 take time into account when finding closest match
(#71)
c9de8ec is described below
commit c9de8ec9a9597f23f743efe1091d69943e800460
Author: fgreg <[email protected]>
AuthorDate: Sun Jul 14 22:19:36 2019 -0700
SDAP-196 take time into account when finding closest match (#71)
---
.../.idea/inspectionProfiles/Project_Default.xml | 53 ++++++++++++++++++++++
analysis/tests/algorithms_spark/Matchup_test.py | 18 ++++----
analysis/webservice/algorithms_spark/Matchup.py | 14 ++++--
3 files changed, 73 insertions(+), 12 deletions(-)
diff --git a/analysis/.idea/inspectionProfiles/Project_Default.xml
b/analysis/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..b4031a3
--- /dev/null
+++ b/analysis/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,53 @@
+<component name="InspectionProjectProfileManager">
+ <profile version="1.0">
+ <option name="myName" value="Project Default" />
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true"
level="WARNING" enabled_by_default="true">
+ <option name="ignoredPackages">
+ <value>
+ <list size="40">
+ <item index="0" class="java.lang.String" itemvalue="pbr" />
+ <item index="1" class="java.lang.String" itemvalue="protobuf" />
+ <item index="2" class="java.lang.String" itemvalue="traits" />
+ <item index="3" class="java.lang.String" itemvalue="basemap" />
+ <item index="4" class="java.lang.String"
itemvalue="python-dateutil" />
+ <item index="5" class="java.lang.String" itemvalue="awscli" />
+ <item index="6" class="java.lang.String" itemvalue="MarkupSafe" />
+ <item index="7" class="java.lang.String" itemvalue="numpy" />
+ <item index="8" class="java.lang.String" itemvalue="pyasn1" />
+ <item index="9" class="java.lang.String" itemvalue="requests" />
+ <item index="10" class="java.lang.String" itemvalue="mpld3" />
+ <item index="11" class="java.lang.String" itemvalue="Jinja2" />
+ <item index="12" class="java.lang.String" itemvalue="pyface" />
+ <item index="13" class="java.lang.String"
itemvalue="backports.ssl-match-hostname" />
+ <item index="14" class="java.lang.String" itemvalue="Pygments" />
+ <item index="15" class="java.lang.String" itemvalue="certifi" />
+ <item index="16" class="java.lang.String" itemvalue="descartes" />
+ <item index="17" class="java.lang.String" itemvalue="pyparsing" />
+ <item index="18" class="java.lang.String" itemvalue="Cython" />
+ <item index="19" class="java.lang.String" itemvalue="scipy" />
+ <item index="20" class="java.lang.String"
itemvalue="cassandra-driver" />
+ <item index="21" class="java.lang.String" itemvalue="six" />
+ <item index="22" class="java.lang.String" itemvalue="tornado" />
+ <item index="23" class="java.lang.String" itemvalue="botocore" />
+ <item index="24" class="java.lang.String" itemvalue="netCDF4" />
+ <item index="25" class="java.lang.String" itemvalue="solrpy" />
+ <item index="26" class="java.lang.String" itemvalue="GDAL" />
+ <item index="27" class="java.lang.String" itemvalue="traitsui" />
+ <item index="28" class="java.lang.String"
itemvalue="nexus-data-access" />
+ <item index="29" class="java.lang.String" itemvalue="utm" />
+ <item index="30" class="java.lang.String"
itemvalue="backports-abc" />
+ <item index="31" class="java.lang.String" itemvalue="pyshp" />
+ <item index="32" class="java.lang.String" itemvalue="boto3" />
+ <item index="33" class="java.lang.String" itemvalue="s3transfer" />
+ <item index="34" class="java.lang.String"
itemvalue="backports.functools-lru-cache" />
+ <item index="35" class="java.lang.String" itemvalue="matplotlib" />
+ <item index="36" class="java.lang.String" itemvalue="configobj" />
+ <item index="37" class="java.lang.String" itemvalue="nexusproto" />
+ <item index="38" class="java.lang.String" itemvalue="pytz" />
+ <item index="39" class="java.lang.String" itemvalue="Cartopy" />
+ </list>
+ </value>
+ </option>
+ </inspection_tool>
+ </profile>
+</component>
\ No newline at end of file
diff --git a/analysis/tests/algorithms_spark/Matchup_test.py
b/analysis/tests/algorithms_spark/Matchup_test.py
index 5dee17c..ca40a66 100644
--- a/analysis/tests/algorithms_spark/Matchup_test.py
+++ b/analysis/tests/algorithms_spark/Matchup_test.py
@@ -30,7 +30,7 @@ class TestMatch_Points(unittest.TestCase):
primary_points = [primary]
matchup_points = [matchup]
- matches = list(match_points_generator(primary_points, matchup_points,
0))
+ matches = list(match_tile_to_point_generator(primary_points,
matchup_points, 0))
self.assertEquals(1, len(matches))
@@ -46,7 +46,7 @@ class TestMatch_Points(unittest.TestCase):
primary_points = [primary]
matchup_points = [matchup]
- matches = list(match_points_generator(primary_points, matchup_points,
150000)) # tolerance 150 km
+ matches = list(match_tile_to_point_generator(primary_points,
matchup_points, 150000)) # tolerance 150 km
self.assertEquals(1, len(matches))
@@ -62,7 +62,7 @@ class TestMatch_Points(unittest.TestCase):
primary_points = [primary]
matchup_points = [matchup]
- matches = list(match_points_generator(primary_points, matchup_points,
200)) # tolerance 200 m
+ matches = list(match_tile_to_point_generator(primary_points,
matchup_points, 200)) # tolerance 200 m
self.assertEquals(1, len(matches))
@@ -78,7 +78,7 @@ class TestMatch_Points(unittest.TestCase):
primary_points = [primary]
matchup_points = [matchup]
- matches = list(match_points_generator(primary_points, matchup_points,
150000)) # tolerance 150 km
+ matches = list(match_tile_to_point_generator(primary_points,
matchup_points, 150000)) # tolerance 150 km
self.assertEquals(0, len(matches))
@@ -89,7 +89,7 @@ class TestMatch_Points(unittest.TestCase):
primary_points = [primary]
matchup_points = [matchup]
- matches = list(match_points_generator(primary_points, matchup_points,
100)) # tolerance 100 m
+ matches = list(match_tile_to_point_generator(primary_points,
matchup_points, 100)) # tolerance 100 m
self.assertEquals(0, len(matches))
@@ -103,7 +103,7 @@ class TestMatch_Points(unittest.TestCase):
DomsPoint(longitude=0.5, latitude=1.5, time=1000, depth=3.0,
data_id=4)
]
- matches = list(match_points_generator(primary_points, matchup_points,
150000)) # tolerance 150 km
+ matches = list(match_tile_to_point_generator(primary_points,
matchup_points, 150000)) # tolerance 150 km
self.assertEquals(3, len(matches))
@@ -126,7 +126,7 @@ class TestMatch_Points(unittest.TestCase):
DomsPoint(longitude=0.5, latitude=1.5, time=1000, depth=3.0,
data_id=5)
]
- matches = list(match_points_generator(primary_points, matchup_points,
150000)) # tolerance 150 km
+ matches = list(match_tile_to_point_generator(primary_points,
matchup_points, 150000)) # tolerance 150 km
self.assertEquals(5, len(matches))
@@ -152,7 +152,7 @@ class TestMatch_Points(unittest.TestCase):
DomsPoint(longitude=-31.121, latitude=31.256, time=1351519892,
depth=4.07, data_id=5)
]
- matches = list(match_points_generator(primary_points, matchup_points,
110000)) # tolerance 110 km
+ matches = list(match_tile_to_point_generator(primary_points,
matchup_points, 110000)) # tolerance 110 km
self.assertEquals(1, len(matches))
@@ -187,7 +187,7 @@ class TestMatch_Points(unittest.TestCase):
log.info("Starting matchup")
log.info("Best of repeat(3, 2) matchups: %s seconds" % min(
- timeit.repeat(lambda: list(match_points_generator(primary_points,
matchup_points, 1500)), repeat=3,
+ timeit.repeat(lambda:
list(match_tile_to_point_generator(primary_points, matchup_points, 1500)),
repeat=3,
number=2)))
diff --git a/analysis/webservice/algorithms_spark/Matchup.py
b/analysis/webservice/algorithms_spark/Matchup.py
index b6bf95c..cd21f17 100644
--- a/analysis/webservice/algorithms_spark/Matchup.py
+++ b/analysis/webservice/algorithms_spark/Matchup.py
@@ -14,7 +14,6 @@
# limitations under the License.
-
import json
import logging
import threading
@@ -478,9 +477,18 @@ def spark_matchup_driver(tile_ids, bounding_wkt,
primary_ds_name, matchup_ds_nam
az12, az21, distance = wgs84_geod.inv(lon1, lat1, lon2, lat2)
return distance
+ # If points are equidistant, determine closest by time
+ def time_diff(primary, matchup):
+ primary_time = iso_time_to_epoch(primary.time)
+ matchup_time = iso_time_to_epoch(matchup.time)
+ return abs(primary_time - matchup_time)
+
rdd_filtered = rdd_filtered \
- .map(lambda (primary, matchup): tuple([primary, tuple([matchup,
dist(primary, matchup)])])) \
- .reduceByKey(lambda match_1, match_2: match_1 if match_1[1] <
match_2[1] else match_2) \
+ .map(lambda (primary, matchup):
+ tuple([primary, tuple([matchup, dist(primary, matchup),
time_diff(primary, matchup)])])) \
+ .reduceByKey(lambda match_1, match_2:
+ match_1 if match_1[1] < match_2[1] or (
+ match_1[1] == match_2[1] and match_1[2] <
match_2[2]) else match_2) \
.mapValues(lambda x: [x[0]])
else:
rdd_filtered = rdd_filtered \