This is an automated email from the ASF dual-hosted git repository. eamonford pushed a commit to branch support-deseason in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit c122c5585a5b0e8b4d0a0bf25b84464de5e9c6ce Author: Eamon Ford <[email protected]> AuthorDate: Mon Sep 14 13:50:23 2020 -0700 compute deseason --- .../webservice/algorithms_spark/TimeSeriesSpark.py | 30 ++++++++++++++-------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py index d56b46b..079143b 100644 --- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py +++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py @@ -204,27 +204,35 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler): if apply_seasonal_cycle_filter: the_time = datetime.now() # get time series for _clim dataset + shortName_clim = shortName + "_clim" daysinrange_clim = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1], bounding_polygon.bounds[3], bounding_polygon.bounds[0], bounding_polygon.bounds[2], - shortName, + shortName_clim, 0, 31535999, metrics_callback=metrics_record.record_metrics) + results_clim, _ = spark_driver(daysinrange_clim, + bounding_polygon, + shortName_clim, + self._tile_service_factory, + metrics_record.record_metrics, + spark_nparts=spark_nparts, + sc=self._sc) + clim_indexed_by_month = {datetime.utcfromtimestamp( + result['time']).month: result for result in results_clim} + if len(clim_indexed_by_month) < 12: + raise NexusProcessingException(reason="There are only " + + len(clim_indexed_by_month) + " months of climatology data for dataset " + + shortName + ". A full year of climatology data is required for computing deseasoned timeseries.") for result in results: - # aline _clim time series with original time series - month = datetime.utcfromtimestamp(result['time']).month - month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt, - shortName) - seasonal_mean = result['mean'] - month_mean - seasonal_min = result['min'] - month_min - seasonal_max = result['max'] - month_max - result['meanSeasonal'] = seasonal_mean - result['minSeasonal'] = seasonal_min - result['maxSeasonal'] = seasonal_max + + result['meanSeasonal'] = result['mean'] - clim_indexed_by_month[month]['mean'] + result['minSeasonal'] = result['min'] - clim_indexed_by_month[month]['min'] + result['maxSeasonal'] = result['max'] - clim_indexed_by_month[month]['max'] self.log.info( "Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), shortName))
