This is an automated email from the ASF dual-hosted git repository. eamonford pushed a commit to branch bug_fixes in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit d562fa80ca66673e58eb86d9fb7429d08337ab0a Author: Eamon Ford <[email protected]> AuthorDate: Mon Aug 10 12:02:32 2020 -0700 revert doms --- .gitignore | 1 + analysis/setup.py | 3 +- analysis/webservice/algorithms_spark/__init__.py | 6 + analysis/webservice/config/web.ini | 2 +- data-access/nexustiles/dao/CassandraProxy.py | 3 - data-access/tests/config/datastores.ini | 9 ++ tools/doms/README.md | 66 +++++++++++ tools/doms/doms_reader.py | 144 +++++++++++++++++++++++ 8 files changed, 229 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 4e4cf6e..3e29626 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,6 @@ *.code-workspace *.idea *.DS_Store +analysis/webservice/algorithms/doms/domsconfig.ini data-access/nexustiles/config/datastores.ini venv/ diff --git a/analysis/setup.py b/analysis/setup.py index 9a449ce..62a6891 100644 --- a/analysis/setup.py +++ b/analysis/setup.py @@ -50,7 +50,8 @@ setuptools.setup( # 'webservice.nexus_tornado.request.renderers' #], package_data={ - 'webservice': ['config/web.ini', 'config/algorithms.ini'] + 'webservice': ['config/web.ini', 'config/algorithms.ini'], + 'webservice.algorithms.doms': ['domsconfig.ini.default'] }, data_files=[ ('static', ['static/index.html']) diff --git a/analysis/webservice/algorithms_spark/__init__.py b/analysis/webservice/algorithms_spark/__init__.py index a25c8d5..d6ed83f 100644 --- a/analysis/webservice/algorithms_spark/__init__.py +++ b/analysis/webservice/algorithms_spark/__init__.py @@ -20,6 +20,7 @@ import ClimMapSpark import CorrMapSpark import DailyDifferenceAverageSpark import HofMoellerSpark +import Matchup import MaximaMinimaSpark import NexusCalcSparkHandler import TimeAvgMapSpark @@ -46,6 +47,11 @@ if module_exists("pyspark"): pass try: + import Matchup + except ImportError: + pass + + try: import TimeAvgMapSpark except ImportError: pass diff --git a/analysis/webservice/config/web.ini b/analysis/webservice/config/web.ini index a1ecb2c..2644ade 100644 --- a/analysis/webservice/config/web.ini +++ b/analysis/webservice/config/web.ini @@ -14,4 +14,4 @@ static_enabled=true static_dir=static [modules] -module_dirs=webservice.algorithms,webservice.algorithms_spark \ No newline at end of file +module_dirs=webservice.algorithms,webservice.algorithms_spark,webservice.algorithms.doms \ No newline at end of file diff --git a/data-access/nexustiles/dao/CassandraProxy.py b/data-access/nexustiles/dao/CassandraProxy.py index 54a849b..a8a4e6e 100644 --- a/data-access/nexustiles/dao/CassandraProxy.py +++ b/data-access/nexustiles/dao/CassandraProxy.py @@ -161,9 +161,6 @@ class CassandraProxy(object): self.__cass_protocol_version = config.getint("cassandra", "protocol_version") self.__cass_dc_policy = config.get("cassandra", "dc_policy") - logger.info("Setting cassandra host to " + self.__cass_url) - logger.info("Setting cassandra username to " + self.__cass_username) - try: self.__cass_port = config.getint("cassandra", "port") except NoOptionError: diff --git a/data-access/tests/config/datastores.ini b/data-access/tests/config/datastores.ini new file mode 100644 index 0000000..194760c --- /dev/null +++ b/data-access/tests/config/datastores.ini @@ -0,0 +1,9 @@ +[cassandra] +host=127.0.0.1 +keyspace=nexustiles +local_datacenter=datacenter1 +protocol_version=3 + +[solr] +host=localhost:8983 +core=nexustiles \ No newline at end of file diff --git a/tools/doms/README.md b/tools/doms/README.md new file mode 100644 index 0000000..c49fa4a --- /dev/null +++ b/tools/doms/README.md @@ -0,0 +1,66 @@ +# doms_reader.py +The functions in doms_reader.py read a DOMS netCDF file into memory, assemble a list of matches of satellite and in situ data, and optionally output the matches to a CSV file. Each matched pair contains one satellite data record and one in situ data record. + +The DOMS netCDF files hold satellite data and in situ data in different groups (`SatelliteData` and `InsituData`). The `matchIDs` netCDF variable contains pairs of IDs (matches) which reference a satellite data record and an in situ data record in their respective groups. These records have a many-to-many relationship; one satellite record may match to many in situ records, and one in situ record may match to many satellite records. The `assemble_matches` function assembles the individua [...] + +## Requirements +This tool was developed and tested with Python 2.7.5 and 3.7.0a0. +Imported packages: +* argparse +* netcdf4 +* sys +* datetime +* csv +* collections +* logging + + +## Functions +### Function: `assemble_matches(filename)` +Read a DOMS netCDF file into memory and return a list of matches from the file. + +#### Parameters +- `filename` (str): the DOMS netCDF file name. + +#### Returns +- `matches` (list): List of matches. + +Each list element in `matches` is a dictionary organized as follows: + For match `m`, netCDF group `GROUP` ('SatelliteData' or 'InsituData'), and netCDF group variable `VARIABLE`: + +`matches[m][GROUP]['matchID']`: netCDF `MatchedRecords` dimension ID for the match +`matches[m][GROUP]['GROUPID']`: GROUP netCDF `dim` dimension ID for the record +`matches[m][GROUP][VARIABLE]`: variable value + +For example, to access the timestamps of the satellite data and the in situ data of the first match in the list, along with the `MatchedRecords` dimension ID and the groups' `dim` dimension ID: +```python +matches[0]['SatelliteData']['time'] +matches[0]['InsituData']['time'] +matches[0]['SatelliteData']['matchID'] +matches[0]['SatelliteData']['SatelliteDataID'] +matches[0]['InsituData']['InsituDataID'] +``` + + +### Function: `matches_to_csv(matches, csvfile)` +Write the DOMS matches to a CSV file. Include a header of column names which are based on the group and variable names from the netCDF file. + +#### Parameters: +- `matches` (list): the list of dictionaries containing the DOMS matches as returned from the `assemble_matches` function. +- `csvfile` (str): the name of the CSV output file. + +## Usage +For example, to read some DOMS netCDF file called `doms_file.nc`: +### Command line +The main function for `doms_reader.py` takes one `filename` parameter (`doms_file.nc` argument in this example) for the DOMS netCDF file to read, calls the `assemble_matches` function, then calls the `matches_to_csv` function to write the matches to a CSV file `doms_matches.csv`. +``` +python doms_reader.py doms_file.nc +``` +``` +python3 doms_reader.py doms_file.nc +``` +### Importing `assemble_matches` +```python +from doms_reader import assemble_matches +matches = assemble_matches('doms_file.nc') +``` diff --git a/tools/doms/doms_reader.py b/tools/doms/doms_reader.py new file mode 100644 index 0000000..c8229c4 --- /dev/null +++ b/tools/doms/doms_reader.py @@ -0,0 +1,144 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +from netCDF4 import Dataset, num2date +import sys +import datetime +import csv +from collections import OrderedDict +import logging + +LOGGER = logging.getLogger("doms_reader") + +def assemble_matches(filename): + """ + Read a DOMS netCDF file and return a list of matches. + + Parameters + ---------- + filename : str + The DOMS netCDF file name. + + Returns + ------- + matches : list + List of matches. Each list element is a dictionary. + For match m, netCDF group GROUP (SatelliteData or InsituData), and + group variable VARIABLE: + matches[m][GROUP]['matchID']: MatchedRecords dimension ID for the match + matches[m][GROUP]['GROUPID']: GROUP dim dimension ID for the record + matches[m][GROUP][VARIABLE]: variable value + """ + + try: + # Open the netCDF file + with Dataset(filename, 'r') as doms_nc: + # Check that the number of groups is consistent w/ the MatchedGroups + # dimension + assert len(doms_nc.groups) == doms_nc.dimensions['MatchedGroups'].size,\ + ("Number of groups isn't the same as MatchedGroups dimension.") + + matches = [] + matched_records = doms_nc.dimensions['MatchedRecords'].size + + # Loop through the match IDs to assemble matches + for match in range(0, matched_records): + match_dict = OrderedDict() + # Grab the data from each platform (group) in the match + for group_num, group in enumerate(doms_nc.groups): + match_dict[group] = OrderedDict() + match_dict[group]['matchID'] = match + ID = doms_nc.variables['matchIDs'][match][group_num] + match_dict[group][group + 'ID'] = ID + for var in doms_nc.groups[group].variables.keys(): + match_dict[group][var] = doms_nc.groups[group][var][ID] + + # Create a UTC datetime field from timestamp + dt = num2date(match_dict[group]['time'], + doms_nc.groups[group]['time'].units) + match_dict[group]['datetime'] = dt + LOGGER.info(match_dict) + matches.append(match_dict) + + return matches + except (OSError, IOError) as err: + LOGGER.exception("Error reading netCDF file " + filename) + raise err + +def matches_to_csv(matches, csvfile): + """ + Write the DOMS matches to a CSV file. Include a header of column names + which are based on the group and variable names from the netCDF file. + + Parameters + ---------- + matches : list + The list of dictionaries containing the DOMS matches as returned from + assemble_matches. + csvfile : str + The name of the CSV output file. + """ + # Create a header for the CSV. Column names are GROUP_VARIABLE or + # GROUP_GROUPID. + header = [] + for key, value in matches[0].items(): + for otherkey in value.keys(): + header.append(key + "_" + otherkey) + + try: + # Write the CSV file + with open(csvfile, 'w') as output_file: + csv_writer = csv.writer(output_file) + csv_writer.writerow(header) + for match in matches: + row = [] + for group, data in match.items(): + for value in data.values(): + row.append(value) + csv_writer.writerow(row) + except (OSError, IOError) as err: + LOGGER.exception("Error writing CSV file " + csvfile) + raise err + +if __name__ == '__main__': + """ + Execution: + python doms_reader.py filename + OR + python3 doms_reader.py filename + """ + logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s', + level=logging.INFO, + datefmt='%Y-%m-%d %H:%M:%S') + + p = argparse.ArgumentParser() + p.add_argument('filename', help='DOMS netCDF file to read') + args = p.parse_args() + + doms_matches = assemble_matches(args.filename) + + matches_to_csv(doms_matches, 'doms_matches.csv') + + + + + + + + + + + \ No newline at end of file
