This is an automated email from the ASF dual-hosted git repository. fgreg pushed a commit to branch SDAP-196 in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit dca271f30c5f95954a9e48a7b49630971409a84a Author: Frank Greguska <[email protected]> AuthorDate: Sun Jul 14 22:14:49 2019 -0700 SDAP-196 take time into account when finding closest match --- .../.idea/inspectionProfiles/Project_Default.xml | 53 ++++++++++++++++++++++ analysis/tests/algorithms_spark/Matchup_test.py | 18 ++++---- analysis/webservice/algorithms_spark/Matchup.py | 14 ++++-- 3 files changed, 73 insertions(+), 12 deletions(-) diff --git a/analysis/.idea/inspectionProfiles/Project_Default.xml b/analysis/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..b4031a3 --- /dev/null +++ b/analysis/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,53 @@ +<component name="InspectionProjectProfileManager"> + <profile version="1.0"> + <option name="myName" value="Project Default" /> + <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true"> + <option name="ignoredPackages"> + <value> + <list size="40"> + <item index="0" class="java.lang.String" itemvalue="pbr" /> + <item index="1" class="java.lang.String" itemvalue="protobuf" /> + <item index="2" class="java.lang.String" itemvalue="traits" /> + <item index="3" class="java.lang.String" itemvalue="basemap" /> + <item index="4" class="java.lang.String" itemvalue="python-dateutil" /> + <item index="5" class="java.lang.String" itemvalue="awscli" /> + <item index="6" class="java.lang.String" itemvalue="MarkupSafe" /> + <item index="7" class="java.lang.String" itemvalue="numpy" /> + <item index="8" class="java.lang.String" itemvalue="pyasn1" /> + <item index="9" class="java.lang.String" itemvalue="requests" /> + <item index="10" class="java.lang.String" itemvalue="mpld3" /> + <item index="11" class="java.lang.String" itemvalue="Jinja2" /> + <item index="12" class="java.lang.String" itemvalue="pyface" /> + <item index="13" class="java.lang.String" itemvalue="backports.ssl-match-hostname" /> + <item index="14" class="java.lang.String" itemvalue="Pygments" /> + <item index="15" class="java.lang.String" itemvalue="certifi" /> + <item index="16" class="java.lang.String" itemvalue="descartes" /> + <item index="17" class="java.lang.String" itemvalue="pyparsing" /> + <item index="18" class="java.lang.String" itemvalue="Cython" /> + <item index="19" class="java.lang.String" itemvalue="scipy" /> + <item index="20" class="java.lang.String" itemvalue="cassandra-driver" /> + <item index="21" class="java.lang.String" itemvalue="six" /> + <item index="22" class="java.lang.String" itemvalue="tornado" /> + <item index="23" class="java.lang.String" itemvalue="botocore" /> + <item index="24" class="java.lang.String" itemvalue="netCDF4" /> + <item index="25" class="java.lang.String" itemvalue="solrpy" /> + <item index="26" class="java.lang.String" itemvalue="GDAL" /> + <item index="27" class="java.lang.String" itemvalue="traitsui" /> + <item index="28" class="java.lang.String" itemvalue="nexus-data-access" /> + <item index="29" class="java.lang.String" itemvalue="utm" /> + <item index="30" class="java.lang.String" itemvalue="backports-abc" /> + <item index="31" class="java.lang.String" itemvalue="pyshp" /> + <item index="32" class="java.lang.String" itemvalue="boto3" /> + <item index="33" class="java.lang.String" itemvalue="s3transfer" /> + <item index="34" class="java.lang.String" itemvalue="backports.functools-lru-cache" /> + <item index="35" class="java.lang.String" itemvalue="matplotlib" /> + <item index="36" class="java.lang.String" itemvalue="configobj" /> + <item index="37" class="java.lang.String" itemvalue="nexusproto" /> + <item index="38" class="java.lang.String" itemvalue="pytz" /> + <item index="39" class="java.lang.String" itemvalue="Cartopy" /> + </list> + </value> + </option> + </inspection_tool> + </profile> +</component> \ No newline at end of file diff --git a/analysis/tests/algorithms_spark/Matchup_test.py b/analysis/tests/algorithms_spark/Matchup_test.py index 5dee17c..ca40a66 100644 --- a/analysis/tests/algorithms_spark/Matchup_test.py +++ b/analysis/tests/algorithms_spark/Matchup_test.py @@ -30,7 +30,7 @@ class TestMatch_Points(unittest.TestCase): primary_points = [primary] matchup_points = [matchup] - matches = list(match_points_generator(primary_points, matchup_points, 0)) + matches = list(match_tile_to_point_generator(primary_points, matchup_points, 0)) self.assertEquals(1, len(matches)) @@ -46,7 +46,7 @@ class TestMatch_Points(unittest.TestCase): primary_points = [primary] matchup_points = [matchup] - matches = list(match_points_generator(primary_points, matchup_points, 150000)) # tolerance 150 km + matches = list(match_tile_to_point_generator(primary_points, matchup_points, 150000)) # tolerance 150 km self.assertEquals(1, len(matches)) @@ -62,7 +62,7 @@ class TestMatch_Points(unittest.TestCase): primary_points = [primary] matchup_points = [matchup] - matches = list(match_points_generator(primary_points, matchup_points, 200)) # tolerance 200 m + matches = list(match_tile_to_point_generator(primary_points, matchup_points, 200)) # tolerance 200 m self.assertEquals(1, len(matches)) @@ -78,7 +78,7 @@ class TestMatch_Points(unittest.TestCase): primary_points = [primary] matchup_points = [matchup] - matches = list(match_points_generator(primary_points, matchup_points, 150000)) # tolerance 150 km + matches = list(match_tile_to_point_generator(primary_points, matchup_points, 150000)) # tolerance 150 km self.assertEquals(0, len(matches)) @@ -89,7 +89,7 @@ class TestMatch_Points(unittest.TestCase): primary_points = [primary] matchup_points = [matchup] - matches = list(match_points_generator(primary_points, matchup_points, 100)) # tolerance 100 m + matches = list(match_tile_to_point_generator(primary_points, matchup_points, 100)) # tolerance 100 m self.assertEquals(0, len(matches)) @@ -103,7 +103,7 @@ class TestMatch_Points(unittest.TestCase): DomsPoint(longitude=0.5, latitude=1.5, time=1000, depth=3.0, data_id=4) ] - matches = list(match_points_generator(primary_points, matchup_points, 150000)) # tolerance 150 km + matches = list(match_tile_to_point_generator(primary_points, matchup_points, 150000)) # tolerance 150 km self.assertEquals(3, len(matches)) @@ -126,7 +126,7 @@ class TestMatch_Points(unittest.TestCase): DomsPoint(longitude=0.5, latitude=1.5, time=1000, depth=3.0, data_id=5) ] - matches = list(match_points_generator(primary_points, matchup_points, 150000)) # tolerance 150 km + matches = list(match_tile_to_point_generator(primary_points, matchup_points, 150000)) # tolerance 150 km self.assertEquals(5, len(matches)) @@ -152,7 +152,7 @@ class TestMatch_Points(unittest.TestCase): DomsPoint(longitude=-31.121, latitude=31.256, time=1351519892, depth=4.07, data_id=5) ] - matches = list(match_points_generator(primary_points, matchup_points, 110000)) # tolerance 110 km + matches = list(match_tile_to_point_generator(primary_points, matchup_points, 110000)) # tolerance 110 km self.assertEquals(1, len(matches)) @@ -187,7 +187,7 @@ class TestMatch_Points(unittest.TestCase): log.info("Starting matchup") log.info("Best of repeat(3, 2) matchups: %s seconds" % min( - timeit.repeat(lambda: list(match_points_generator(primary_points, matchup_points, 1500)), repeat=3, + timeit.repeat(lambda: list(match_tile_to_point_generator(primary_points, matchup_points, 1500)), repeat=3, number=2))) diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py index b6bf95c..cd21f17 100644 --- a/analysis/webservice/algorithms_spark/Matchup.py +++ b/analysis/webservice/algorithms_spark/Matchup.py @@ -14,7 +14,6 @@ # limitations under the License. - import json import logging import threading @@ -478,9 +477,18 @@ def spark_matchup_driver(tile_ids, bounding_wkt, primary_ds_name, matchup_ds_nam az12, az21, distance = wgs84_geod.inv(lon1, lat1, lon2, lat2) return distance + # If points are equidistant, determine closest by time + def time_diff(primary, matchup): + primary_time = iso_time_to_epoch(primary.time) + matchup_time = iso_time_to_epoch(matchup.time) + return abs(primary_time - matchup_time) + rdd_filtered = rdd_filtered \ - .map(lambda (primary, matchup): tuple([primary, tuple([matchup, dist(primary, matchup)])])) \ - .reduceByKey(lambda match_1, match_2: match_1 if match_1[1] < match_2[1] else match_2) \ + .map(lambda (primary, matchup): + tuple([primary, tuple([matchup, dist(primary, matchup), time_diff(primary, matchup)])])) \ + .reduceByKey(lambda match_1, match_2: + match_1 if match_1[1] < match_2[1] or ( + match_1[1] == match_2[1] and match_1[2] < match_2[2]) else match_2) \ .mapValues(lambda x: [x[0]]) else: rdd_filtered = rdd_filtered \
