This is an automated email from the ASF dual-hosted git repository. rkk pushed a commit to branch SDAP-521 in repository https://gitbox.apache.org/repos/asf/sdap-nexus.git
commit 7445246a85b01ee2e7119ff48c6038d08f4df168 Author: rileykk <[email protected]> AuthorDate: Wed Jul 3 15:28:16 2024 -0700 Updates --- docs/quickstart.rst | 12 +- docs/test.rst | 6 +- tests/conftest.py | 6 +- tests/download_data.sh | 89 ++++++++++++++- tests/test_collections.yaml | 24 +++- tests/{test_cdms.py => test_sdap.py} | 215 +++++++++++++++++++++++++++++------ 6 files changed, 300 insertions(+), 52 deletions(-) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index f393493..671731e 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -49,11 +49,11 @@ Pull the necessary Docker images from the `Apache SDAP repository <https://hub.d export CASSANDRA_VERSION=3.11.6-debian-10-r138 export RMQ_VERSION=3.8.9-debian-10-r37 - export COLLECTION_MANAGER_VERSION=1.2.0 - export GRANULE_INGESTER_VERSION=1.2.0 - export WEBAPP_VERSION=1.2.0 - export SOLR_VERSION=1.2.0 - export SOLR_CLOUD_INIT_VERSION=1.2.0 + export COLLECTION_MANAGER_VERSION=1.3.0 + export GRANULE_INGESTER_VERSION=1.3.0 + export WEBAPP_VERSION=1.3.0 + export SOLR_VERSION=1.3.0 + export SOLR_CLOUD_INIT_VERSION=1.3.0 export ZK_VERSION=3.5.5 export JUPYTER_VERSION=1.0.0-rc2 @@ -314,7 +314,7 @@ The collection configuration is a ``.yml`` file that tells the collection manage cat << EOF >> ${CONFIG_DIR}/collectionConfig.yml collections: - id: AVHRR_OI_L4_GHRSST_NCEI - path: /data/granules/*.nc + path: /data/granules/*AVHRR_OI-GLOB-v02.0-fv02.0.nc priority: 1 forward-processing-priority: 5 projection: Grid diff --git a/docs/test.rst b/docs/test.rst index 75997ca..dcec503 100644 --- a/docs/test.rst +++ b/docs/test.rst @@ -45,8 +45,10 @@ If you have not started the Collection Manager, start it now: docker run --name collection-manager --network sdap-net -v ${DATA_DIRECTORY}:/data/granules/ -v $(pwd):/home/ingester/config/ -e COLLECTIONS_PATH="/home/ingester/config/test_collections.yaml" -e HISTORY_URL="http://host.docker.internal:8983/" -e RABBITMQ_HOST="host.docker.internal:5672" -e RABBITMQ_USERNAME="user" -e RABBITMQ_PASSWORD="bitnami" -d ${REPO}/sdap-collection-manager:${COLLECTION_MANAGER_VERSION} -Refer to the :ref:`Quickstart Guide<quickstart>` to see how many files are enqueued for ingest, there should be 134 total. -(This may appear to be less if you have ingesters running. We recommend not starting the ingesters until all data is queued) +Refer to the :ref:`Quickstart Guide<quickstart>` to see how many files are enqueued for ingest, there should be 207 total. +(This may appear to be less if you have ingesters running. We recommend not starting the ingesters until all data is queued. +You may also see more if the Collection Manager was running during the data download. This is a known issue where the Collection +Manager queues downloading files more than once as they're seen as modified.) Once the data is ready for ingest, start up the ingester(s) and wait for them to finish. After that, you can stop the Collection Manager, ingester and RabbitMQ containers and start the webapp container if it is not already running. diff --git a/tests/conftest.py b/tests/conftest.py index c2a7707..2bb4e42 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,9 +19,9 @@ import pytest def pytest_addoption(parser): parser.addoption("--skip-matchup", action="store_true", help="Skip matchup_spark test. (Only for script testing purposes)") - parser.addoption('--matchup-fail-on-miscount', action='store_true', - help='Fail matchup tests if they return an unexpected number of matches; ' - 'otherwise issue a warning') + parser.addoption('--matchup-warn-on-miscount', action='store_false', + help='Issue a warning for matchup tests if they return an unexpected number of matches; ' + 'otherwise fail') def pytest_collection_modifyitems(config, items): diff --git a/tests/download_data.sh b/tests/download_data.sh index 877f1f8..6064ad3 100755 --- a/tests/download_data.sh +++ b/tests/download_data.sh @@ -64,8 +64,9 @@ setup_auth_wget() { } fetch_urls() { + echo "Downloading files for collection ${collection}" + download_dir=${DATA_DIRECTORY}/$collection - echo mkdir -p $download_dir mkdir -p $download_dir if command -v curl >/dev/null 2>&1; then @@ -77,7 +78,8 @@ fetch_urls() { # Strip everything after '?' stripped_query_params="${filename%%\?*}" - curl -f -b "$cookiejar" -c "$cookiejar" -L --netrc-file "$netrc" -g -o $download_dir/$stripped_query_params -- $line && echo || exit_with_error "Command failed with error. Please retrieve the data manually." + echo "Downloading ${line}" + curl -s -f -b "$cookiejar" -c "$cookiejar" -L --netrc-file "$netrc" -g -o $download_dir/$stripped_query_params -- $line || exit_with_error "Command failed with error. Please retrieve the data manually." done; elif command -v wget >/dev/null 2>&1; then # We can't use wget to poke provider server to get info whether or not URS was integrated without download at least one of the files. @@ -93,7 +95,8 @@ fetch_urls() { # Strip everything after '?' stripped_query_params="${filename%%\?*}" - wget --load-cookies "$cookiejar" --save-cookies "$cookiejar" --output-document $download_dir/$stripped_query_params --keep-session-cookies -- $line && echo || exit_with_error "Command failed with error. Please retrieve the data manually." + echo "Downloading ${line}" + wget -q --load-cookies "$cookiejar" --save-cookies "$cookiejar" --output-document $download_dir/$stripped_query_params --keep-session-cookies -- $line && echo || exit_with_error "Command failed with error. Please retrieve the data manually." done; else exit_with_error "Error: Could not find a command-line downloader. Please install curl or wget" @@ -194,11 +197,15 @@ https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/MUR25-JPL https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/MUR25-JPL-L4-GLOB-v04.2/20180705090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc EDSCEOF -collection="ASCATB_L2_Coastal_test" +collection="ASCATB-L2-Coastal_test" fetch_urls <<'EDSCEOF' +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180704_041200_metopb_30055_eps_o_coa_2401_ovw.l2.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180704_174500_metopb_30063_eps_o_coa_2401_ovw.l2.nc https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180705_053300_metopb_30070_eps_o_coa_2401_ovw.l2.nc https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180705_172400_metopb_30077_eps_o_coa_2401_ovw.l2.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180706_051200_metopb_30084_eps_o_coa_2401_ovw.l2.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180706_170300_metopb_30091_eps_o_coa_2401_ovw.l2.nc https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180801_025100_metopb_30452_eps_o_coa_2401_ovw.l2.nc https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180801_144200_metopb_30459_eps_o_coa_2401_ovw.l2.nc https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ASCATB-L2-Coastal/ascat_20180801_162400_metopb_30460_eps_o_coa_2401_ovw.l2.nc @@ -253,3 +260,77 @@ https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/OISSS_L4_ https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/OISSS_L4_multimission_7day_v1/OISSS_L4_multimission_global_7d_v1.0_2018-08-03.nc https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/OISSS_L4_multimission_7day_v1/OISSS_L4_multimission_global_7d_v1.0_2018-07-30.nc EDSCEOF + +collection="SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_test" + +fetch_urls <<'EDSCEOF' +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/273/SMAP_L3_SSS_20181004_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/272/SMAP_L3_SSS_20181003_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/271/SMAP_L3_SSS_20181002_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/270/SMAP_L3_SSS_20181001_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/269/SMAP_L3_SSS_20180930_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/268/SMAP_L3_SSS_20180929_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/267/SMAP_L3_SSS_20180928_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/266/SMAP_L3_SSS_20180927_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/265/SMAP_L3_SSS_20180926_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/264/SMAP_L3_SSS_20180925_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/263/SMAP_L3_SSS_20180924_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/262/SMAP_L3_SSS_20180923_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/261/SMAP_L3_SSS_20180922_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/260/SMAP_L3_SSS_20180921_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/259/SMAP_L3_SSS_20180920_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/258/SMAP_L3_SSS_20180919_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/257/SMAP_L3_SSS_20180918_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/256/SMAP_L3_SSS_20180917_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/255/SMAP_L3_SSS_20180916_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/254/SMAP_L3_SSS_20180915_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/253/SMAP_L3_SSS_20180914_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/252/SMAP_L3_SSS_20180913_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/251/SMAP_L3_SSS_20180912_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/250/SMAP_L3_SSS_20180911_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/249/SMAP_L3_SSS_20180910_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/248/SMAP_L3_SSS_20180909_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/247/SMAP_L3_SSS_20180908_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/246/SMAP_L3_SSS_20180907_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/245/SMAP_L3_SSS_20180906_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/244/SMAP_L3_SSS_20180905_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/243/SMAP_L3_SSS_20180904_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/242/SMAP_L3_SSS_20180903_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/241/SMAP_L3_SSS_20180902_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/240/SMAP_L3_SSS_20180901_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/239/SMAP_L3_SSS_20180831_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/238/SMAP_L3_SSS_20180830_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/237/SMAP_L3_SSS_20180829_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/236/SMAP_L3_SSS_20180828_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/235/SMAP_L3_SSS_20180827_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/234/SMAP_L3_SSS_20180826_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/233/SMAP_L3_SSS_20180825_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/232/SMAP_L3_SSS_20180824_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/231/SMAP_L3_SSS_20180823_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/230/SMAP_L3_SSS_20180822_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/229/SMAP_L3_SSS_20180821_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/228/SMAP_L3_SSS_20180820_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/227/SMAP_L3_SSS_20180819_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/226/SMAP_L3_SSS_20180818_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/225/SMAP_L3_SSS_20180817_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/224/SMAP_L3_SSS_20180816_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/223/SMAP_L3_SSS_20180815_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/222/SMAP_L3_SSS_20180814_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/221/SMAP_L3_SSS_20180813_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/220/SMAP_L3_SSS_20180812_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/219/SMAP_L3_SSS_20180811_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/218/SMAP_L3_SSS_20180810_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/217/SMAP_L3_SSS_20180809_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/216/SMAP_L3_SSS_20180808_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/215/SMAP_L3_SSS_20180807_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/214/SMAP_L3_SSS_20180806_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/213/SMAP_L3_SSS_20180805_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/212/SMAP_L3_SSS_20180804_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/211/SMAP_L3_SSS_20180803_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/210/SMAP_L3_SSS_20180802_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/209/SMAP_L3_SSS_20180801_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/208/SMAP_L3_SSS_20180731_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/207/SMAP_L3_SSS_20180730_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/206/SMAP_L3_SSS_20180729_8DAYS_V5.0.nc +https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5/2018/205/SMAP_L3_SSS_20180728_8DAYS_V5.0.nc +EDSCEOF diff --git a/tests/test_collections.yaml b/tests/test_collections.yaml index 0c21406..0331ebe 100644 --- a/tests/test_collections.yaml +++ b/tests/test_collections.yaml @@ -39,8 +39,8 @@ collections: variable: sss slices: time: 1 - latitude: 30 - longitude: 30 + latitude: 100 + longitude: 100 - id: VIIRS_NPP-2018_Heatwave_test path: /data/granules/VIIRS_NPP-2018_Heatwave_test/*.nc priority: 1 @@ -52,4 +52,22 @@ collections: variable: sea_surface_temperature slices: ni: 30 - nj: 30 \ No newline at end of file + nj: 30 + preprocess: + - name: squeeze + dimensions: + - time + - id: SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_test + path: /data/granules/SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_test/*.nc + priority: 1 + forward-processing-priority: 1 + projection: Grid + dimensionNames: + latitude: latitude + longitude: longitude + time: time + variable: smap_sss + slices: + time: 1 + latitude: 100 + longitude: 100 \ No newline at end of file diff --git a/tests/test_cdms.py b/tests/test_sdap.py similarity index 85% rename from tests/test_cdms.py rename to tests/test_sdap.py index 0b17af3..1919c9e 100644 --- a/tests/test_cdms.py +++ b/tests/test_sdap.py @@ -20,12 +20,13 @@ import io import itertools import json import os +import re import warnings from datetime import datetime from pathlib import Path from tempfile import NamedTemporaryFile as Temp from time import sleep -from urllib.parse import urljoin +from urllib.parse import urljoin, urlparse, urlunparse from zipfile import ZipFile import pandas as pd @@ -88,7 +89,7 @@ def timeouts(): @pytest.fixture() def fail_on_miscount(request): - return request.config.getoption('--matchup-fail-on-miscount', default=False) + return request.config.getoption('--matchup-warn-on-miscount', default=False) @pytest.fixture(scope='session') @@ -164,7 +165,7 @@ def matchup_params(): return { 'gridded_to_gridded': { "primary": "MUR25-JPL-L4-GLOB-v04.2_test", - "secondary": "OISSS_L4_multimission_7day_v1_test", + "secondary": "SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_test", "startTime": "2018-08-01T00:00:00Z", "endTime": "2018-08-02T00:00:00Z", "b": "-100,20,-90,30", @@ -219,8 +220,8 @@ def matchup_params(): "platforms": "42" }, 'long': { # TODO: Find something for this; it's copied atm - "primary": "VIIRS_NPP-2018_Heatwave", - "secondary": "ASCATB-L2-Coastal", + "primary": "VIIRS_NPP-2018_Heatwave_test", + "secondary": "ASCATB-L2-Coastal_test", "startTime": "2018-07-05T00:00:00Z", "endTime": "2018-07-05T23:59:59Z", "b": "-120,28,-118,30", @@ -484,6 +485,13 @@ def check_count(count, expected, fail_on_mismatch): warnings.warn(f'Incorrect count: Expected {expected}, got {count}') +def url_scheme(scheme, url): + if urlparse(url).scheme == scheme: + return url + else: + return urlunparse(tuple([scheme] + list(urlparse(url)[1:]))) + + # Run the matchup query and return json output (and eid?) # Should be able to work if match_spark is synchronous or asynchronous def run_matchup(url, params, page_size=3500): @@ -492,6 +500,8 @@ def run_matchup(url, params, page_size=3500): response = requests.get(url, params=params) + scheme = urlparse(url).scheme + assert response.status_code == 200, 'Initial match_spark query failed' response_json = response.json() @@ -503,6 +513,8 @@ def run_matchup(url, params, page_size=3500): start = datetime.utcnow() job_url = [link for link in response_json['links'] if link['rel'] == 'self'][0]['href'] + job_url = url_scheme(scheme, job_url) + retries = 3 timeouts = [2, 5, 10] @@ -535,6 +547,8 @@ def run_matchup(url, params, page_size=3500): link for link in response_json['links'] if 'STAC' in link['title'] ][0]['href'] + stac_url = url_scheme(scheme, stac_url) + catalogue_response = requests.get(stac_url) assert catalogue_response.status_code == 200, 'Catalogue fetch failed' @@ -544,13 +558,16 @@ def run_matchup(url, params, page_size=3500): link for link in catalogue_response['links'] if 'JSON' in link['title'] ][0]['href'] + json_cat_url = url_scheme(scheme, json_cat_url) + catalogue_response = requests.get(json_cat_url) assert catalogue_response.status_code == 200, 'Catalogue fetch failed' catalogue_response = catalogue_response.json() results_urls = [ - link['href'] for link in catalogue_response['links'] if 'output=JSON' in link['href'] + url_scheme(scheme, link['href']) for link in + catalogue_response['links'] if 'output=JSON' in link['href'] # link['href'] for link in response_json['links'] if link['type'] == 'application/json' ] @@ -563,7 +580,11 @@ def run_matchup(url, params, page_size=3500): try: response.raise_for_status() - return response.json() + result = response.json() + + assert result['count'] == len(result['data']) + + return result except: retries -= 1 sleep(retry_delay) @@ -574,7 +595,7 @@ def run_matchup(url, params, page_size=3500): matchup_result = get_results(results_urls[0]) for url in results_urls[1:]: - matchup_result['data'].append(get_results(url)['data']) + matchup_result['data'].extend(get_results(url)['data']) return matchup_result @@ -584,7 +605,7 @@ def run_matchup(url, params, page_size=3500): ['match', 'expected'], list(zip( ['gridded_to_gridded', 'gridded_to_swath', 'swath_to_gridded', 'swath_to_swath'], - [1110, 6, 21, 4027] + [1058, 6, 21, 4026] )) ) def test_match_spark(host, start, fail_on_miscount, matchup_params, match, expected): @@ -598,16 +619,15 @@ def test_match_spark(host, start, fail_on_miscount, matchup_params, match, expec try_save(f"test_matchup_spark_{match}", start, body) data = body['data'] - assert body['count'] == len(data) - uniq_primaries(data, case=f"test_matchup_spark_{match}") - check_count(len(data), expected, fail_on_miscount) - for match in data: verify_match_consistency(match, params, bounding_poly) + uniq_primaries(data, case=f"test_matchup_spark_{match}") + check_count(len(data), expected, fail_on_miscount) + @pytest.mark.integration -def test_matchup_spark_job_cancellation(host, start, matchup_params): +def test_match_spark_job_cancellation(host, start, matchup_params): url = urljoin(host, 'match_spark') params = matchup_params['long'] @@ -622,13 +642,18 @@ def test_matchup_spark_job_cancellation(host, start, matchup_params): if not asynchronous: skip('Deployed SDAP version does not have asynchronous matchup') else: - sleep(25) # Time to allow spark workers to start working + sleep(1) # Time to allow spark workers to start working if response_json['status'] != 'running': skip('Job finished before it could be cancelled') else: cancel_url = [link for link in response_json['links'] if link['rel'] == 'cancel'][0]['href'] + cancel_url = url_scheme( + urlparse(url).scheme, + cancel_url + ) + cancel_response = requests.get(cancel_url) assert cancel_response.status_code == 200, 'Cancellation query failed' @@ -939,6 +964,52 @@ def test_cdmsresults_netcdf(host, eid, start): warnings.filterwarnings('default') [email protected] +def test_timeseries_spark(host, start): + url = urljoin(host, 'timeSeriesSpark') + + params = { + "ds": "MUR25-JPL-L4-GLOB-v04.2_test", + "b": "-135,-10,-80,10", + "startTime": "2018-07-05T00:00:00Z", + "endTime": "2018-09-30T23:59:59Z", + } + + response = requests.get(url, params=params) + + assert response.status_code == 200 + + data = response.json() + try_save('test_timeseries_spark', start, data) + + assert len(data['data']) == len(pd.date_range(params['startTime'], params['endTime'], freq='D')) + + epoch = datetime(1970, 1, 1, tzinfo=UTC) + + start = (datetime.strptime(params['startTime'], '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=UTC) - epoch).total_seconds() + end = (datetime.strptime(params['endTime'], '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=UTC) - epoch).total_seconds() + + for p in data['data']: + assert start <= p[0]['time'] <= end + + [email protected] +def test_list(host, start): + url = urljoin(host, 'list') + + response = requests.get(url) + + assert response.status_code == 200 + + body = response.json() + try_save("test_list", start, body) + + assert isinstance(body, list) + + if len(body) == 0: + warnings.warn('/list returned no datasets. This could be correct if SDAP has no data ingested, otherwise ' + 'this should be considered a failure') + @pytest.mark.integration def test_cdmslist(host, start): url = urljoin(host, 'cdmslist') @@ -955,8 +1026,13 @@ def test_cdmslist(host, start): num_satellite = len(data['satellite']) num_insitu = len(data['insitu']) - assert num_insitu > 0 - assert num_satellite > 0 + if num_satellite == 0: + warnings.warn('/cdmslist returned no satellite datasets. This could be correct if SDAP has no data ingested, ' + 'otherwise this should be considered a failure') + + if num_insitu == 0: + warnings.warn('/cdmslist returned no insitu datasets. This could be correct if SDAP has no insitu data ' + 'ingested, otherwise this should be considered a failure') @pytest.mark.integration @@ -964,7 +1040,7 @@ def test_cdmssubset_L4(host, start): url = urljoin(host, 'cdmssubset') params = { - "dataset": "MUR25-JPL-L4-GLOB-v04.2", + "dataset": "MUR25-JPL-L4-GLOB-v04.2_test", "parameter": "sst", "startTime": "2018-09-24T00:00:00Z", "endTime": "2018-09-30T00:00:00Z", @@ -985,7 +1061,7 @@ def test_cdmssubset_L4(host, start): with ZipFile(response_buf) as data: namelist = data.namelist() - assert namelist == ['MUR25-JPL-L4-GLOB-v04.2.csv'] + assert namelist == ['MUR25-JPL-L4-GLOB-v04.2_test.csv'] csv_buf = io.StringIO(data.read(namelist[0]).decode('utf-8')) csv_data = pd.read_csv(csv_buf) @@ -997,7 +1073,7 @@ def test_cdmssubset_L4(host, start): for i in range(0, len(csv_data)): validate_row_bounds(csv_data.iloc[i]) - params['dataset'] = 'OISSS_L4_multimission_7day_v1' + params['dataset'] = 'OISSS_L4_multimission_7day_v1_test' response = requests.get(url, params=params) @@ -1010,7 +1086,7 @@ def test_cdmssubset_L4(host, start): with ZipFile(response_buf) as data: namelist = data.namelist() - assert namelist == ['OISSS_L4_multimission_7day_v1.csv'] + assert namelist == ['OISSS_L4_multimission_7day_v1_test.csv'] csv_buf = io.StringIO(data.read(namelist[0]).decode('utf-8')) csv_data = pd.read_csv(csv_buf) @@ -1024,7 +1100,7 @@ def test_cdmssubset_L2(host, start): url = urljoin(host, 'cdmssubset') params = { - "dataset": "ASCATB-L2-Coastal", + "dataset": "ASCATB-L2-Coastal_test", "startTime": "2018-09-24T00:00:00Z", "endTime": "2018-09-30T00:00:00Z", "b": "160,-30,180,-25", @@ -1044,7 +1120,7 @@ def test_cdmssubset_L2(host, start): with ZipFile(response_buf) as data: namelist = data.namelist() - assert namelist == ['ASCATB-L2-Coastal.csv'] + assert namelist == ['ASCATB-L2-Coastal_test.csv'] csv_buf = io.StringIO(data.read(namelist[0]).decode('utf-8')) csv_data = pd.read_csv(csv_buf) @@ -1057,17 +1133,6 @@ def test_cdmssubset_L2(host, start): validate_row_bounds(csv_data.iloc[i]) [email protected] -def test_insitu_schema(start, timeouts): - url = 'https://doms.jpl.nasa.gov/insitu/1.0/cdms_schema' - - response = requests.get(url, timeout=timeouts) - - assert response.status_code == 200 - - assert len(response.json()) > 0 - - @pytest.mark.integration def test_swaggerui_sdap(host): url = urljoin(host, 'apidocs/') @@ -1111,3 +1176,85 @@ def test_swaggerui_sdap(host): raise ValueError("Could not verify documentation yaml file, assumed value also failed") [email protected] +def test_version(host, start): + url = urljoin(host, 'version') + + response = requests.get(url) + + assert response.status_code == 200 + assert re.match(r'^\d+\.\d+\.\d+(-.+)?$', response.text) + + [email protected] +def test_capabilities(host, start): + url = urljoin(host, 'capabilities') + + response = requests.get(url) + + assert response.status_code == 200 + + capabilities = response.json() + + try_save('test_capabilities', start, capabilities) + + assert len(capabilities) > 0 + + for capability in capabilities: + assert all([k in capability for k in ['name', 'path', 'description', 'parameters']]) + assert all([isinstance(k, str) for k in ['name', 'path', 'description']]) + + assert isinstance(capability['parameters'], (dict, list)) + + for param in capability['parameters']: + if isinstance(capability['parameters'], dict): + param = capability['parameters'][param] + + assert isinstance(param, dict) + assert all([k in param and isinstance(param[k], str) for k in ['name', 'type', 'description']]) + + [email protected] +def test_endpoints(host, start): + url = urljoin(host, 'capabilities') + + response = requests.get(url) + + if response.status_code != 200: + skip('Could not get endpoints list. Expected if test_capabilities has failed') + + capabilities = response.json() + + endpoints = [c['path'] for c in capabilities] + + non_existent_endpoints = [] + + for endpoint in endpoints: + status = requests.head(urljoin(host, endpoint)).status_code + + if status == 404: + # Strip special characters because some endpoints have wildcards/regex characters + # This may not work forever though + stripped_endpoint = re.sub(r'[^a-zA-Z0-9/_-]', '', endpoint) + + status = requests.head(urljoin(host, stripped_endpoint)).status_code + + if status == 404: + non_existent_endpoints.append(([endpoint, stripped_endpoint], status)) + + assert len(non_existent_endpoints) == 0, non_existent_endpoints + + [email protected] +def test_heartbeat(host, start): + url = urljoin(host, 'heartbeat') + + response = requests.get(url) + + assert response.status_code == 200 + heartbeat = response.json() + + assert isinstance(heartbeat, dict) + assert all(heartbeat.values()) + +
