(sdap-nexus) 01/02: Updates

rkk Fri, 05 Jul 2024 10:48:34 -0700

This is an automated email from the ASF dual-hosted git repository.

rkk pushed a commit to branch SDAP-521
in repository https://gitbox.apache.org/repos/asf/sdap-nexus.git


commit 7445246a85b01ee2e7119ff48c6038d08f4df168
Author: rileykk <[email protected]>
AuthorDate: Wed Jul 3 15:28:16 2024 -0700

    Updates
---
 docs/quickstart.rst                  |  12 +-
 docs/test.rst                        |   6 +-
 tests/conftest.py                    |   6 +-
 tests/download_data.sh               |  89 ++++++++++++++-
 tests/test_collections.yaml          |  24 +++-
 tests/{test_cdms.py => test_sdap.py} | 215 +++++++++++++++++++++++++++++------
 6 files changed, 300 insertions(+), 52 deletions(-)

diff --git a/docs/quickstart.rst b/docs/quickstart.rst
index f393493..671731e 100644
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -49,11 +49,11 @@ Pull the necessary Docker images from the `Apache SDAP 
repository <https://hub.d
 
   export CASSANDRA_VERSION=3.11.6-debian-10-r138
   export RMQ_VERSION=3.8.9-debian-10-r37
-  export COLLECTION_MANAGER_VERSION=1.2.0
-  export GRANULE_INGESTER_VERSION=1.2.0
-  export WEBAPP_VERSION=1.2.0
-  export SOLR_VERSION=1.2.0
-  export SOLR_CLOUD_INIT_VERSION=1.2.0
+  export COLLECTION_MANAGER_VERSION=1.3.0
+  export GRANULE_INGESTER_VERSION=1.3.0
+  export WEBAPP_VERSION=1.3.0
+  export SOLR_VERSION=1.3.0
+  export SOLR_CLOUD_INIT_VERSION=1.3.0
   export ZK_VERSION=3.5.5
 
   export JUPYTER_VERSION=1.0.0-rc2
@@ -314,7 +314,7 @@ The collection configuration is a ``.yml`` file that tells 
the collection manage
   cat << EOF >> ${CONFIG_DIR}/collectionConfig.yml
   collections:
     - id: AVHRR_OI_L4_GHRSST_NCEI
-      path: /data/granules/*.nc
+      path: /data/granules/*AVHRR_OI-GLOB-v02.0-fv02.0.nc
       priority: 1
       forward-processing-priority: 5
       projection: Grid
diff --git a/docs/test.rst b/docs/test.rst
index 75997ca..dcec503 100644
--- a/docs/test.rst
+++ b/docs/test.rst
@@ -45,8 +45,10 @@ If you have not started the Collection Manager, start it now:
 
   docker run --name collection-manager --network sdap-net -v 
${DATA_DIRECTORY}:/data/granules/ -v $(pwd):/home/ingester/config/ -e 
COLLECTIONS_PATH="/home/ingester/config/test_collections.yaml" -e 
HISTORY_URL="http://host.docker.internal:8983/"; -e 
RABBITMQ_HOST="host.docker.internal:5672" -e RABBITMQ_USERNAME="user" -e 
RABBITMQ_PASSWORD="bitnami" -d 
${REPO}/sdap-collection-manager:${COLLECTION_MANAGER_VERSION}
 
-Refer to the :ref:`Quickstart Guide<quickstart>` to see how many files are 
enqueued for ingest, there should be 134 total.
-(This may appear to be less if you have ingesters running. We recommend not 
starting the ingesters until all data is queued)
+Refer to the :ref:`Quickstart Guide<quickstart>` to see how many files are 
enqueued for ingest, there should be 207 total.
+(This may appear to be less if you have ingesters running. We recommend not 
starting the ingesters until all data is queued.
+You may also see more if the Collection Manager was running during the data 
download. This is a known issue where the Collection
+Manager queues downloading files more than once as they're seen as modified.)
 
 Once the data is ready for ingest, start up the ingester(s) and wait for them 
to finish. After that, you can stop the Collection Manager,
 ingester and RabbitMQ containers and start the webapp container if it is not 
already running.
diff --git a/tests/conftest.py b/tests/conftest.py
index c2a7707..2bb4e42 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -19,9 +19,9 @@ import pytest
 def pytest_addoption(parser):
     parser.addoption("--skip-matchup", action="store_true",
                      help="Skip matchup_spark test. (Only for script testing 
purposes)")
-    parser.addoption('--matchup-fail-on-miscount', action='store_true',
-                     help='Fail matchup tests if they return an unexpected 
number of matches; '
-                          'otherwise issue a warning')
+    parser.addoption('--matchup-warn-on-miscount', action='store_false',
+                     help='Issue a warning for matchup tests if they return an 
unexpected number of matches; '
+                          'otherwise fail')
 
 
 def pytest_collection_modifyitems(config, items):
diff --git a/tests/download_data.sh b/tests/download_data.sh
index 877f1f8..6064ad3 100755
--- a/tests/download_data.sh
+++ b/tests/download_data.sh
@@ -64,8 +64,9 @@ setup_auth_wget() {
 }
 
 fetch_urls() {
+  echo "Downloading files for collection ${collection}"
+
   download_dir=${DATA_DIRECTORY}/$collection
-  echo mkdir -p $download_dir
   mkdir -p $download_dir
 
   if command -v curl >/dev/null 2>&1; then
@@ -77,7 +78,8 @@ fetch_urls() {
         # Strip everything after '?'
         stripped_query_params="${filename%%\?*}"
 
-        curl -f -b "$cookiejar" -c "$cookiejar" -L --netrc-file "$netrc" -g -o 
$download_dir/$stripped_query_params -- $line && echo || exit_with_error 
"Command failed with error. Please retrieve the data manually."
+        echo "Downloading ${line}"
+        curl -s -f -b "$cookiejar" -c "$cookiejar" -L --netrc-file "$netrc" -g 
-o $download_dir/$stripped_query_params -- $line || exit_with_error "Command 
failed with error. Please retrieve the data manually."
       done;
   elif command -v wget >/dev/null 2>&1; then
       # We can't use wget to poke provider server to get info whether or not 
URS was integrated without download at least one of the files.
@@ -93,7 +95,8 @@ fetch_urls() {
         # Strip everything after '?'
         stripped_query_params="${filename%%\?*}"
 
-        wget --load-cookies "$cookiejar" --save-cookies "$cookiejar" 
--output-document $download_dir/$stripped_query_params --keep-session-cookies 
-- $line && echo || exit_with_error "Command failed with error. Please retrieve 
the data manually."
+        echo "Downloading ${line}"
+        wget -q --load-cookies "$cookiejar" --save-cookies "$cookiejar" 
--output-document $download_dir/$stripped_query_params --keep-session-cookies 
-- $line && echo || exit_with_error "Command failed with error. Please retrieve 
the data manually."
       done;
   else
       exit_with_error "Error: Could not find a command-line downloader.  
Please install curl or wget"
@@ -194,11 +197,15 @@ 
https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/MUR25-JPL
 
https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/MUR25-JPL-L4-GLOB-v04.2/20180705090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
 EDSCEOF
 
-collection="ASCATB_L2_Coastal_test"
+collection="ASCATB-L2-Coastal_test"
 
 fetch_urls <<'EDSCEOF'
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180704_041200_metopb_30055_eps_o_coa_2401_ovw.l2.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180704_174500_metopb_30063_eps_o_coa_2401_ovw.l2.nc
 
https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180705_053300_metopb_30070_eps_o_coa_2401_ovw.l2.nc
 
https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180705_172400_metopb_30077_eps_o_coa_2401_ovw.l2.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180706_051200_metopb_30084_eps_o_coa_2401_ovw.l2.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180706_170300_metopb_30091_eps_o_coa_2401_ovw.l2.nc
 
https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180801_025100_metopb_30452_eps_o_coa_2401_ovw.l2.nc
 
https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180801_144200_metopb_30459_eps_o_coa_2401_ovw.l2.nc
 
https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180801_162400_metopb_30460_eps_o_coa_2401_ovw.l2.nc
@@ -253,3 +260,77 @@ 
https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/OISSS_L4_
 
https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/OISSS_L4_multimission_7day_v1/OISSS_L4_multimission_global_7d_v1.0_2018-08-03.nc
 
https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/OISSS_L4_multimission_7day_v1/OISSS_L4_multimission_global_7d_v1.0_2018-07-30.nc
 EDSCEOF
+
+collection="SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_test"
+
+fetch_urls <<'EDSCEOF'
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/273/SMAP_L3_SSS_20181004_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/272/SMAP_L3_SSS_20181003_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/271/SMAP_L3_SSS_20181002_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/270/SMAP_L3_SSS_20181001_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/269/SMAP_L3_SSS_20180930_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/268/SMAP_L3_SSS_20180929_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/267/SMAP_L3_SSS_20180928_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/266/SMAP_L3_SSS_20180927_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/265/SMAP_L3_SSS_20180926_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/264/SMAP_L3_SSS_20180925_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/263/SMAP_L3_SSS_20180924_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/262/SMAP_L3_SSS_20180923_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/261/SMAP_L3_SSS_20180922_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/260/SMAP_L3_SSS_20180921_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/259/SMAP_L3_SSS_20180920_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/258/SMAP_L3_SSS_20180919_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/257/SMAP_L3_SSS_20180918_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/256/SMAP_L3_SSS_20180917_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/255/SMAP_L3_SSS_20180916_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/254/SMAP_L3_SSS_20180915_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/253/SMAP_L3_SSS_20180914_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/252/SMAP_L3_SSS_20180913_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/251/SMAP_L3_SSS_20180912_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/250/SMAP_L3_SSS_20180911_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/249/SMAP_L3_SSS_20180910_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/248/SMAP_L3_SSS_20180909_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/247/SMAP_L3_SSS_20180908_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/246/SMAP_L3_SSS_20180907_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/245/SMAP_L3_SSS_20180906_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/244/SMAP_L3_SSS_20180905_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/243/SMAP_L3_SSS_20180904_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/242/SMAP_L3_SSS_20180903_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/241/SMAP_L3_SSS_20180902_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/240/SMAP_L3_SSS_20180901_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/239/SMAP_L3_SSS_20180831_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/238/SMAP_L3_SSS_20180830_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/237/SMAP_L3_SSS_20180829_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/236/SMAP_L3_SSS_20180828_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/235/SMAP_L3_SSS_20180827_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/234/SMAP_L3_SSS_20180826_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/233/SMAP_L3_SSS_20180825_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/232/SMAP_L3_SSS_20180824_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/231/SMAP_L3_SSS_20180823_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/230/SMAP_L3_SSS_20180822_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/229/SMAP_L3_SSS_20180821_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/228/SMAP_L3_SSS_20180820_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/227/SMAP_L3_SSS_20180819_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/226/SMAP_L3_SSS_20180818_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/225/SMAP_L3_SSS_20180817_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/224/SMAP_L3_SSS_20180816_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/223/SMAP_L3_SSS_20180815_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/222/SMAP_L3_SSS_20180814_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/221/SMAP_L3_SSS_20180813_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/220/SMAP_L3_SSS_20180812_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/219/SMAP_L3_SSS_20180811_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/218/SMAP_L3_SSS_20180810_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/217/SMAP_L3_SSS_20180809_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/216/SMAP_L3_SSS_20180808_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/215/SMAP_L3_SSS_20180807_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/214/SMAP_L3_SSS_20180806_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/213/SMAP_L3_SSS_20180805_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/212/SMAP_L3_SSS_20180804_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/211/SMAP_L3_SSS_20180803_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/210/SMAP_L3_SSS_20180802_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/209/SMAP_L3_SSS_20180801_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/208/SMAP_L3_SSS_20180731_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/207/SMAP_L3_SSS_20180730_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/206/SMAP_L3_SSS_20180729_8DAYS_V5.0.nc
+https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/205/SMAP_L3_SSS_20180728_8DAYS_V5.0.nc
+EDSCEOF
diff --git a/tests/test_collections.yaml b/tests/test_collections.yaml
index 0c21406..0331ebe 100644
--- a/tests/test_collections.yaml
+++ b/tests/test_collections.yaml
@@ -39,8 +39,8 @@ collections:
       variable: sss
     slices:
       time: 1
-      latitude: 30
-      longitude: 30
+      latitude: 100
+      longitude: 100
   - id: VIIRS_NPP-2018_Heatwave_test
     path: /data/granules/VIIRS_NPP-2018_Heatwave_test/*.nc
     priority: 1
@@ -52,4 +52,22 @@ collections:
       variable: sea_surface_temperature
     slices:
       ni: 30
-      nj: 30
\ No newline at end of file
+      nj: 30
+    preprocess:
+      - name: squeeze
+        dimensions:
+          - time
+  - id: SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_test
+    path: /data/granules/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_test/*.nc
+    priority: 1
+    forward-processing-priority: 1
+    projection: Grid
+    dimensionNames:
+      latitude: latitude
+      longitude: longitude
+      time: time
+      variable: smap_sss
+    slices:
+      time: 1
+      latitude: 100
+      longitude: 100
\ No newline at end of file
diff --git a/tests/test_cdms.py b/tests/test_sdap.py
similarity index 85%
rename from tests/test_cdms.py
rename to tests/test_sdap.py
index 0b17af3..1919c9e 100644
--- a/tests/test_cdms.py
+++ b/tests/test_sdap.py
@@ -20,12 +20,13 @@ import io
 import itertools
 import json
 import os
+import re
 import warnings
 from datetime import datetime
 from pathlib import Path
 from tempfile import NamedTemporaryFile as Temp
 from time import sleep
-from urllib.parse import urljoin
+from urllib.parse import urljoin, urlparse, urlunparse
 from zipfile import ZipFile
 
 import pandas as pd
@@ -88,7 +89,7 @@ def timeouts():
 
 @pytest.fixture()
 def fail_on_miscount(request):
-    return request.config.getoption('--matchup-fail-on-miscount', 
default=False)
+    return request.config.getoption('--matchup-warn-on-miscount', 
default=False)
 
 
 @pytest.fixture(scope='session')
@@ -164,7 +165,7 @@ def matchup_params():
     return {
         'gridded_to_gridded': {
             "primary": "MUR25-JPL-L4-GLOB-v04.2_test",
-            "secondary": "OISSS_L4_multimission_7day_v1_test",
+            "secondary": "SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_test",
             "startTime": "2018-08-01T00:00:00Z",
             "endTime": "2018-08-02T00:00:00Z",
             "b": "-100,20,-90,30",
@@ -219,8 +220,8 @@ def matchup_params():
             "platforms": "42"
         },
         'long': {  # TODO: Find something for this; it's copied atm
-            "primary": "VIIRS_NPP-2018_Heatwave",
-            "secondary": "ASCATB-L2-Coastal",
+            "primary": "VIIRS_NPP-2018_Heatwave_test",
+            "secondary": "ASCATB-L2-Coastal_test",
             "startTime": "2018-07-05T00:00:00Z",
             "endTime": "2018-07-05T23:59:59Z",
             "b": "-120,28,-118,30",
@@ -484,6 +485,13 @@ def check_count(count, expected, fail_on_mismatch):
         warnings.warn(f'Incorrect count: Expected {expected}, got {count}')
 
 
+def url_scheme(scheme, url):
+    if urlparse(url).scheme == scheme:
+        return url
+    else:
+        return urlunparse(tuple([scheme] + list(urlparse(url)[1:])))
+
+
 # Run the matchup query and return json output (and eid?)
 # Should be able to work if match_spark is synchronous or asynchronous
 def run_matchup(url, params, page_size=3500):
@@ -492,6 +500,8 @@ def run_matchup(url, params, page_size=3500):
 
     response = requests.get(url, params=params)
 
+    scheme = urlparse(url).scheme
+
     assert response.status_code == 200, 'Initial match_spark query failed'
     response_json = response.json()
 
@@ -503,6 +513,8 @@ def run_matchup(url, params, page_size=3500):
         start = datetime.utcnow()
         job_url = [link for link in response_json['links'] if link['rel'] == 
'self'][0]['href']
 
+        job_url = url_scheme(scheme, job_url)
+
         retries = 3
         timeouts = [2, 5, 10]
 
@@ -535,6 +547,8 @@ def run_matchup(url, params, page_size=3500):
                 link for link in response_json['links'] if 'STAC' in 
link['title']
             ][0]['href']
 
+            stac_url = url_scheme(scheme, stac_url)
+
             catalogue_response = requests.get(stac_url)
             assert catalogue_response.status_code == 200, 'Catalogue fetch 
failed'
 
@@ -544,13 +558,16 @@ def run_matchup(url, params, page_size=3500):
                 link for link in catalogue_response['links'] if 'JSON' in 
link['title']
             ][0]['href']
 
+            json_cat_url = url_scheme(scheme, json_cat_url)
+
             catalogue_response = requests.get(json_cat_url)
             assert catalogue_response.status_code == 200, 'Catalogue fetch 
failed'
 
             catalogue_response = catalogue_response.json()
 
             results_urls = [
-                link['href'] for link in catalogue_response['links'] if 
'output=JSON' in link['href']
+                url_scheme(scheme, link['href']) for link in
+                catalogue_response['links'] if 'output=JSON' in link['href']
                 # link['href'] for link in response_json['links'] if 
link['type'] == 'application/json'
             ]
 
@@ -563,7 +580,11 @@ def run_matchup(url, params, page_size=3500):
 
                     try:
                         response.raise_for_status()
-                        return response.json()
+                        result = response.json()
+
+                        assert result['count'] == len(result['data'])
+
+                        return result
                     except:
                         retries -= 1
                         sleep(retry_delay)
@@ -574,7 +595,7 @@ def run_matchup(url, params, page_size=3500):
             matchup_result = get_results(results_urls[0])
 
             for url in results_urls[1:]:
-                matchup_result['data'].append(get_results(url)['data'])
+                matchup_result['data'].extend(get_results(url)['data'])
 
             return matchup_result
 
@@ -584,7 +605,7 @@ def run_matchup(url, params, page_size=3500):
     ['match', 'expected'],
     list(zip(
         ['gridded_to_gridded', 'gridded_to_swath', 'swath_to_gridded', 
'swath_to_swath'],
-        [1110, 6, 21, 4027]
+        [1058, 6, 21, 4026]
     ))
 )
 def test_match_spark(host, start, fail_on_miscount, matchup_params, match, 
expected):
@@ -598,16 +619,15 @@ def test_match_spark(host, start, fail_on_miscount, 
matchup_params, match, expec
     try_save(f"test_matchup_spark_{match}", start, body)
     data = body['data']
 
-    assert body['count'] == len(data)
-    uniq_primaries(data, case=f"test_matchup_spark_{match}")
-    check_count(len(data), expected, fail_on_miscount)
-
     for match in data:
         verify_match_consistency(match, params, bounding_poly)
 
+    uniq_primaries(data, case=f"test_matchup_spark_{match}")
+    check_count(len(data), expected, fail_on_miscount)
+
 
 @pytest.mark.integration
-def test_matchup_spark_job_cancellation(host, start, matchup_params):
+def test_match_spark_job_cancellation(host, start, matchup_params):
     url = urljoin(host, 'match_spark')
 
     params = matchup_params['long']
@@ -622,13 +642,18 @@ def test_matchup_spark_job_cancellation(host, start, 
matchup_params):
     if not asynchronous:
         skip('Deployed SDAP version does not have asynchronous matchup')
     else:
-        sleep(25) # Time to allow spark workers to start working
+        sleep(1)  # Time to allow spark workers to start working
 
         if response_json['status'] != 'running':
             skip('Job finished before it could be cancelled')
         else:
             cancel_url = [link for link in response_json['links'] if 
link['rel'] == 'cancel'][0]['href']
 
+            cancel_url = url_scheme(
+                urlparse(url).scheme,
+                cancel_url
+            )
+
             cancel_response = requests.get(cancel_url)
             assert cancel_response.status_code == 200, 'Cancellation query 
failed'
 
@@ -939,6 +964,52 @@ def test_cdmsresults_netcdf(host, eid, start):
     warnings.filterwarnings('default')
 
 
[email protected]
+def test_timeseries_spark(host, start):
+    url = urljoin(host, 'timeSeriesSpark')
+
+    params = {
+        "ds": "MUR25-JPL-L4-GLOB-v04.2_test",
+        "b": "-135,-10,-80,10",
+        "startTime": "2018-07-05T00:00:00Z",
+        "endTime": "2018-09-30T23:59:59Z",
+    }
+
+    response = requests.get(url, params=params)
+
+    assert response.status_code == 200
+
+    data = response.json()
+    try_save('test_timeseries_spark', start, data)
+
+    assert len(data['data']) == len(pd.date_range(params['startTime'], 
params['endTime'], freq='D'))
+
+    epoch = datetime(1970, 1, 1, tzinfo=UTC)
+
+    start = (datetime.strptime(params['startTime'], 
'%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=UTC) - epoch).total_seconds()
+    end = (datetime.strptime(params['endTime'], 
'%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=UTC) - epoch).total_seconds()
+
+    for p in data['data']:
+        assert start <= p[0]['time'] <= end
+
+
[email protected]
+def test_list(host, start):
+    url = urljoin(host, 'list')
+
+    response = requests.get(url)
+
+    assert response.status_code == 200
+
+    body = response.json()
+    try_save("test_list", start, body)
+
+    assert isinstance(body, list)
+
+    if len(body) == 0:
+        warnings.warn('/list returned no datasets. This could be correct if 
SDAP has no data ingested, otherwise '
+                      'this should be considered a failure')
+
 @pytest.mark.integration
 def test_cdmslist(host, start):
     url = urljoin(host, 'cdmslist')
@@ -955,8 +1026,13 @@ def test_cdmslist(host, start):
     num_satellite = len(data['satellite'])
     num_insitu = len(data['insitu'])
 
-    assert num_insitu > 0
-    assert num_satellite > 0
+    if num_satellite == 0:
+        warnings.warn('/cdmslist returned no satellite datasets. This could be 
correct if SDAP has no data ingested, '
+                      'otherwise this should be considered a failure')
+
+    if num_insitu == 0:
+        warnings.warn('/cdmslist returned no insitu datasets. This could be 
correct if SDAP has no insitu data '
+                      'ingested, otherwise this should be considered a 
failure')
 
 
 @pytest.mark.integration
@@ -964,7 +1040,7 @@ def test_cdmssubset_L4(host, start):
     url = urljoin(host, 'cdmssubset')
 
     params = {
-        "dataset": "MUR25-JPL-L4-GLOB-v04.2",
+        "dataset": "MUR25-JPL-L4-GLOB-v04.2_test",
         "parameter": "sst",
         "startTime": "2018-09-24T00:00:00Z",
         "endTime": "2018-09-30T00:00:00Z",
@@ -985,7 +1061,7 @@ def test_cdmssubset_L4(host, start):
     with ZipFile(response_buf) as data:
         namelist = data.namelist()
 
-        assert namelist == ['MUR25-JPL-L4-GLOB-v04.2.csv']
+        assert namelist == ['MUR25-JPL-L4-GLOB-v04.2_test.csv']
 
         csv_buf = io.StringIO(data.read(namelist[0]).decode('utf-8'))
         csv_data = pd.read_csv(csv_buf)
@@ -997,7 +1073,7 @@ def test_cdmssubset_L4(host, start):
     for i in range(0, len(csv_data)):
         validate_row_bounds(csv_data.iloc[i])
 
-    params['dataset'] = 'OISSS_L4_multimission_7day_v1'
+    params['dataset'] = 'OISSS_L4_multimission_7day_v1_test'
 
     response = requests.get(url, params=params)
 
@@ -1010,7 +1086,7 @@ def test_cdmssubset_L4(host, start):
     with ZipFile(response_buf) as data:
         namelist = data.namelist()
 
-        assert namelist == ['OISSS_L4_multimission_7day_v1.csv']
+        assert namelist == ['OISSS_L4_multimission_7day_v1_test.csv']
 
         csv_buf = io.StringIO(data.read(namelist[0]).decode('utf-8'))
         csv_data = pd.read_csv(csv_buf)
@@ -1024,7 +1100,7 @@ def test_cdmssubset_L2(host, start):
     url = urljoin(host, 'cdmssubset')
 
     params = {
-        "dataset": "ASCATB-L2-Coastal",
+        "dataset": "ASCATB-L2-Coastal_test",
         "startTime": "2018-09-24T00:00:00Z",
         "endTime": "2018-09-30T00:00:00Z",
         "b": "160,-30,180,-25",
@@ -1044,7 +1120,7 @@ def test_cdmssubset_L2(host, start):
     with ZipFile(response_buf) as data:
         namelist = data.namelist()
 
-        assert namelist == ['ASCATB-L2-Coastal.csv']
+        assert namelist == ['ASCATB-L2-Coastal_test.csv']
 
         csv_buf = io.StringIO(data.read(namelist[0]).decode('utf-8'))
         csv_data = pd.read_csv(csv_buf)
@@ -1057,17 +1133,6 @@ def test_cdmssubset_L2(host, start):
         validate_row_bounds(csv_data.iloc[i])
 
 
[email protected]
-def test_insitu_schema(start, timeouts):
-    url = 'https://doms.jpl.nasa.gov/insitu/1.0/cdms_schema'
-
-    response = requests.get(url, timeout=timeouts)
-
-    assert response.status_code == 200
-
-    assert len(response.json()) > 0
-
-
 @pytest.mark.integration
 def test_swaggerui_sdap(host):
     url = urljoin(host, 'apidocs/')
@@ -1111,3 +1176,85 @@ def test_swaggerui_sdap(host):
             raise ValueError("Could not verify documentation yaml file, 
assumed value also failed")
 
 
[email protected]
+def test_version(host, start):
+    url = urljoin(host, 'version')
+
+    response = requests.get(url)
+
+    assert response.status_code == 200
+    assert re.match(r'^\d+\.\d+\.\d+(-.+)?$', response.text)
+
+
[email protected]
+def test_capabilities(host, start):
+    url = urljoin(host, 'capabilities')
+
+    response = requests.get(url)
+
+    assert response.status_code == 200
+
+    capabilities = response.json()
+
+    try_save('test_capabilities', start, capabilities)
+
+    assert len(capabilities) > 0
+
+    for capability in capabilities:
+        assert all([k in capability for k in ['name', 'path', 'description', 
'parameters']])
+        assert all([isinstance(k, str) for k in ['name', 'path', 
'description']])
+
+        assert isinstance(capability['parameters'], (dict, list))
+
+        for param in capability['parameters']:
+            if isinstance(capability['parameters'], dict):
+                param = capability['parameters'][param]
+
+            assert isinstance(param, dict)
+            assert all([k in param and isinstance(param[k], str) for k in 
['name', 'type', 'description']])
+
+
[email protected]
+def test_endpoints(host, start):
+    url = urljoin(host, 'capabilities')
+
+    response = requests.get(url)
+
+    if response.status_code != 200:
+        skip('Could not get endpoints list. Expected if test_capabilities has 
failed')
+
+    capabilities = response.json()
+
+    endpoints = [c['path'] for c in capabilities]
+
+    non_existent_endpoints = []
+
+    for endpoint in endpoints:
+        status = requests.head(urljoin(host, endpoint)).status_code
+
+        if status == 404:
+            # Strip special characters because some endpoints have 
wildcards/regex characters
+            # This may not work forever though
+            stripped_endpoint = re.sub(r'[^a-zA-Z0-9/_-]', '', endpoint)
+
+            status = requests.head(urljoin(host, 
stripped_endpoint)).status_code
+
+            if status == 404:
+                non_existent_endpoints.append(([endpoint, stripped_endpoint], 
status))
+
+    assert len(non_existent_endpoints) == 0, non_existent_endpoints
+
+
[email protected]
+def test_heartbeat(host, start):
+    url = urljoin(host, 'heartbeat')
+
+    response = requests.get(url)
+
+    assert response.status_code == 200
+    heartbeat = response.json()
+
+    assert isinstance(heartbeat, dict)
+    assert all(heartbeat.values())
+
+

(sdap-nexus) 01/02: Updates

Reply via email to