This is an automated email from the ASF dual-hosted git repository.

rkk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git


The following commit(s) were added to refs/heads/master by this push:
     new 2fea55a  SDAP-475 Algorithm bug fixes (#259)
2fea55a is described below

commit 2fea55a2e9e0ccc2eaed14bc359038c0f477f92e
Author: Kevin <[email protected]>
AuthorDate: Mon Aug 21 13:02:56 2023 -0700

    SDAP-475 Algorithm bug fixes (#259)
    
    * Bug fix for less than 12 months of climatology
    
    * Bug fix for newer numpy versions breaking array creation when underlying 
sequence contains elements of different sizes
    
    * Update CHANGELOG.md
    
    ---------
    
    Co-authored-by: Riley Kuttruff <[email protected]>
---
 CHANGELOG.md                                            |  1 +
 analysis/webservice/algorithms_spark/TimeAvgMapSpark.py |  4 ++--
 analysis/webservice/algorithms_spark/TimeSeriesSpark.py | 15 +++++----------
 3 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14b2576..61db7a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ and this project adheres to [Semantic 
Versioning](https://semver.org/spec/v2.0.0
 - SDAP-465: Removed `climatology` directory. 
 ### Fixed
 - SDAP-474: Fixed bug in CSV attributes where secondary dataset would be 
rendered as comma separated characters
+- SDAP-475: Bug fixes for `/timeSeriesSpark` and `/timeAvgMapSpark`
 ### Security
 
 ## [1.1.0] - 2023-04-26
diff --git a/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py 
b/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py
index 45130ca..750ba59 100644
--- a/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py
@@ -169,9 +169,9 @@ class 
TimeAvgMapNexusSparkHandlerImpl(NexusCalcSparkHandler):
                                                               
self._maxLonCent))
 
         # Create array of tuples to pass to Spark map function
-        nexus_tiles_spark = [[self._find_tile_bounds(t),
+        nexus_tiles_spark = np.array([[self._find_tile_bounds(t),
                               self._startTime, self._endTime,
-                              self._ds] for t in nexus_tiles]
+                              self._ds] for t in nexus_tiles], dtype='object')
 
         # Remove empty tiles (should have bounds set to None)
         bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0]
diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py 
b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
index faeaa0b..90ae14d 100644
--- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
@@ -226,19 +226,14 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
                                                spark_nparts=spark_nparts,
                                                sc=self._sc)
                 clim_indexed_by_month = 
{datetime.utcfromtimestamp(result['time']).month: result for result in 
results_clim}
-                if len(clim_indexed_by_month) < 12:
-                    raise NexusProcessingException(reason="There are only " +
-                                                   len(clim_indexed_by_month) 
+ " months of climatology data for dataset " + 
-                                                   shortName + ". A full year 
of climatology data is required for computing deseasoned timeseries.")
-
+                
                 for result in results:
                     month = datetime.utcfromtimestamp(result['time']).month
 
-                    result['meanSeasonal'] = result['mean'] - 
clim_indexed_by_month[month]['mean']
-                    result['minSeasonal'] = result['min'] - 
clim_indexed_by_month[month]['min']
-                    result['maxSeasonal'] = result['max'] - 
clim_indexed_by_month[month]['max']
-                self.log.info(
-                    "Seasonal calculation took %s for dataset %s" % 
(str(datetime.now() - the_time), shortName))
+                    result['meanSeasonal'] = result['mean'] - 
clim_indexed_by_month.get(month, result)['mean']
+                    result['minSeasonal'] = result['min'] - 
clim_indexed_by_month.get(month, result)['min']
+                    result['maxSeasonal'] = result['max'] - 
clim_indexed_by_month.get(month, result)['max']
+                self.log.info("Seasonal calculation took %s for dataset %s" % 
(str(datetime.now() - the_time), shortName))
 
             the_time = datetime.now()
             filtering.applyAllFiltersOnField(results, 'mean', 
applySeasonal=False, applyLowPass=apply_low_pass_filter)

Reply via email to