http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/data-access/nexustiles/model/__init__.py ---------------------------------------------------------------------- diff --git a/data-access/nexustiles/model/__init__.py b/data-access/nexustiles/model/__init__.py new file mode 100644 index 0000000..e69de29
http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/data-access/nexustiles/model/nexusmodel.py ---------------------------------------------------------------------- diff --git a/data-access/nexustiles/model/nexusmodel.py b/data-access/nexustiles/model/nexusmodel.py new file mode 100644 index 0000000..4ea43b7 --- /dev/null +++ b/data-access/nexustiles/model/nexusmodel.py @@ -0,0 +1,250 @@ +""" +Copyright (c) 2016 Jet Propulsion Laboratory, +California Institute of Technology. All rights reserved +""" +from collections import namedtuple + +import numpy as np + +NexusPoint = namedtuple('NexusPoint', 'latitude longitude depth time index data_val') +BBox = namedtuple('BBox', 'min_lat max_lat min_lon max_lon') +TileStats = namedtuple('TileStats', 'min max mean count') + + +class Tile(object): + def __init__(self): + self.tile_id = None + self.dataset_id = None + self.section_spec = None + self.dataset = None + self.granule = None + + self.bbox = None + + self.min_time = None + self.max_time = None + + self.tile_stats = None + + self.latitudes = None # This should be a 1-d ndarray + self.longitudes = None # This should be a 1-d ndarray + self.times = None # This should be a 1-d ndarray + self.data = None # This should be an ndarray with shape len(times) x len(latitudes) x len(longitudes) + + self.meta_data = None # This should be a dict of the form { 'meta_data_name' : [[[ndarray]]] }. Each ndarray should be the same shape as data. + + def __str__(self): + return str(self.get_summary()) + + def get_summary(self): + summary = dict(self.__dict__) + + try: + summary['latitudes'] = self.latitudes.shape + except AttributeError: + summary['latitudes'] = 'None' + + try: + summary['longitudes'] = self.longitudes.shape + except AttributeError: + summary['longitudes'] = 'None' + + try: + summary['times'] = self.times.shape + except AttributeError: + summary['times'] = 'None' + + try: + summary['data'] = self.data.shape + except AttributeError: + summary['data'] = 'None' + + try: + summary['meta_data'] = {meta_name: meta_array.shape for meta_name, meta_array in self.meta_data.iteritems()} + except AttributeError: + summary['meta_data'] = 'None' + + return summary + + def nexus_point_generator(self, include_nan=False): + if include_nan: + for index in np.ndindex(self.data.shape): + time = self.times[index[0]] + lat = self.latitudes[index[1]] + lon = self.longitudes[index[2]] + data_val = self.data[index] + point = NexusPoint(lat, lon, None, time, index, data_val) + yield point + else: + for index in np.transpose(np.ma.nonzero(self.data)): + index = tuple(index) + time = self.times[index[0]] + lat = self.latitudes[index[1]] + lon = self.longitudes[index[2]] + data_val = self.data[index] + point = NexusPoint(lat, lon, None, time, index, data_val) + yield point + + def get_indices(self, include_nan=False): + if include_nan: + return list(np.ndindex(self.data.shape)) + else: + return np.transpose(np.where(np.ma.getmaskarray(self.data) == False)).tolist() + + def contains_point(self, lat, lon): + + return contains_point(self.latitudes, self.longitudes, lat, lon) + + def update_stats(self): + + t_min = np.nanmin(self.data).item() + t_max = np.nanmax(self.data).item() + t_mean = np.ma.average(np.ma.masked_invalid(self.data).flatten(), + weights=np.cos(np.radians(np.repeat(self.latitudes, len(self.longitudes))))) + t_count = self.data.size - np.count_nonzero(np.isnan(self.data)) + self.tile_stats = TileStats(t_min, t_max, t_mean, t_count) + + +def contains_point(latitudes, longitudes, lat, lon): + minx, miny, maxx, maxy = np.ma.min(longitudes), np.ma.min(latitudes), np.ma.max( + longitudes), np.ma.max(latitudes) + return ( + (miny < lat or np.isclose(miny, lat)) and + (lat < maxy or np.isclose(lat, maxy)) + ) and ( + (minx < lon or np.isclose(minx, lon)) and + (lon < maxx or np.isclose(lon, maxx)) + ) + + +def merge_tiles(tile_list): + a = np.array([tile.times for tile in tile_list]) + assert np.ma.max(a) == np.ma.min(a) + + merged_times = tile_list[0].times + merged_lats = np.ndarray((0,), dtype=np.float32) + merged_lons = np.ndarray((0,), dtype=np.float32) + merged_data = np.ndarray((0, 0), dtype=np.float32) + + for tile in tile_list: + if np.ma.in1d(tile.latitudes, merged_lats).all() and not np.ma.in1d(tile.longitudes, merged_lons).all(): + merged_lons = np.ma.concatenate([merged_lons, tile.longitudes]) + merged_data = np.ma.hstack((merged_data, np.ma.squeeze(tile.data))) + elif not np.ma.in1d(tile.latitudes, merged_lats).all() and np.ma.in1d(tile.longitudes, merged_lons).all(): + merged_lats = np.ma.concatenate([merged_lats, tile.latitudes]) + merged_data = np.ma.vstack((merged_data, np.ma.squeeze(tile.data))) + elif not np.ma.in1d(tile.latitudes, merged_lats).all() and not np.ma.in1d(tile.longitudes, merged_lons).all(): + merged_lats = np.ma.concatenate([merged_lats, tile.latitudes]) + merged_lons = np.ma.concatenate([merged_lons, tile.longitudes]) + merged_data = block_diag(*[merged_data, np.ma.squeeze(tile.data)]) + else: + raise Exception("Can't handle overlapping tiles") + + merged_data = merged_data[np.ma.argsort(merged_lats), :] + merged_data = merged_data[:, np.ma.argsort(merged_lons)] + merged_lats = merged_lats[np.ma.argsort(merged_lats),] + merged_lons = merged_lons[np.ma.argsort(merged_lons),] + + merged_data = merged_data[np.newaxis, :] + + return merged_times, merged_lats, merged_lons, merged_data + + +def block_diag(*arrs): + """Create a block diagonal matrix from the provided arrays. + + Given the inputs `A`, `B` and `C`, the output will have these + arrays arranged on the diagonal:: + + [[A, 0, 0], + [0, B, 0], + [0, 0, C]] + + If all the input arrays are square, the output is known as a + block diagonal matrix. + + Parameters + ---------- + A, B, C, ... : array-like, up to 2D + Input arrays. A 1D array or array-like sequence with length n is + treated as a 2D array with shape (1,n). + + Returns + ------- + D : ndarray + Array with `A`, `B`, `C`, ... on the diagonal. `D` has the + same dtype as `A`. + + References + ---------- + .. [1] Wikipedia, "Block matrix", + http://en.wikipedia.org/wiki/Block_diagonal_matrix + + Examples + -------- + >>> A = [[1, 0], + ... [0, 1]] + >>> B = [[3, 4, 5], + ... [6, 7, 8]] + >>> C = [[7]] + >>> print(block_diag(A, B, C)) + [[1 0 0 0 0 0] + [0 1 0 0 0 0] + [0 0 3 4 5 0] + [0 0 6 7 8 0] + [0 0 0 0 0 7]] + >>> block_diag(1.0, [2, 3], [[4, 5], [6, 7]]) + array([[ 1., 0., 0., 0., 0.], + [ 0., 2., 3., 0., 0.], + [ 0., 0., 0., 4., 5.], + [ 0., 0., 0., 6., 7.]]) + + """ + if arrs == (): + arrs = ([],) + arrs = [np.atleast_2d(a) for a in arrs] + + bad_args = [k for k in range(len(arrs)) if arrs[k].ndim > 2] + if bad_args: + raise ValueError("arguments in the following positions have dimension " + "greater than 2: %s" % bad_args) + + shapes = np.array([a.shape for a in arrs]) + out = np.ma.masked_all(np.sum(shapes, axis=0), dtype=arrs[0].dtype) + + r, c = 0, 0 + for i, (rr, cc) in enumerate(shapes): + out[r:r + rr, c:c + cc] = arrs[i] + r += rr + c += cc + return out + + +def find_nearest(array, value): + idx = (np.abs(array - value)).argmin() + return array[idx] + + +def get_approximate_value_for_lat_lon(tile_list, lat, lon): + """ + This function pulls the value out of one of the tiles in tile_list that is the closest to the given + lat, lon point. + + :returns float value closest to lat lon point or float('Nan') if the point is masked or not contained in any tile + """ + + try: + times, lats, longs, data = merge_tiles(tile_list) + if not contains_point(lats, longs, lat, lon): + # Lat, Lon is out of bounds for these tiles + return float('NaN') + except AssertionError: + # Tiles are not all at the same time + return float('NaN') + + nearest_lat = find_nearest(lats, lat) + nearest_long = find_nearest(longs, lon) + + data_val = data[0][(np.abs(lats - lat)).argmin()][(np.abs(longs - lon)).argmin()] + + return data_val.item() if (data_val is not np.ma.masked) and data_val.size == 1 else float('Nan') http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/data-access/nexustiles/nexustiles.py ---------------------------------------------------------------------- diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py new file mode 100644 index 0000000..e97ecf6 --- /dev/null +++ b/data-access/nexustiles/nexustiles.py @@ -0,0 +1,366 @@ +""" +Copyright (c) 2016 Jet Propulsion Laboratory, +California Institute of Technology. All rights reserved +""" +import ConfigParser +from datetime import datetime +from functools import wraps + +import numpy as np +import numpy.ma as ma +import pkg_resources +import sys +from pytz import timezone +from shapely.geometry import MultiPolygon, box + +from dao.CassandraProxy import CassandraProxy +from dao.SolrProxy import SolrProxy +from model.nexusmodel import Tile, BBox, TileStats + +EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) + + +def tile_data(default_fetch=True): + def tile_data_decorator(func): + @wraps(func) + def fetch_data_for_func(*args, **kwargs): + if ('fetch_data' not in kwargs and not default_fetch) or ( + 'fetch_data' in kwargs and not kwargs['fetch_data']): + solr_docs = func(*args, **kwargs) + tiles = args[0]._solr_docs_to_tiles(*solr_docs) + return tiles + else: + solr_docs = func(*args, **kwargs) + tiles = args[0]._solr_docs_to_tiles(*solr_docs) + if len(tiles) > 0: + args[0].fetch_data_for_tiles(*tiles) + return tiles + + return fetch_data_for_func + + return tile_data_decorator + + +class NexusTileServiceException(Exception): + pass + + +class NexusTileService(object): + def __init__(self, skipCassandra=False, skipSolr=False, config=None): + if config is None: + self._config = ConfigParser.RawConfigParser() + self._config.readfp(pkg_resources.resource_stream(__name__, "config/datastores.ini"), + filename='datastores.ini') + else: + self._config = config + + if not skipCassandra: + self._cass = CassandraProxy(self._config) + + if not skipSolr: + self._solr = SolrProxy(self._config) + + def get_dataseries_list(self, simple=False): + if simple: + return self._solr.get_data_series_list_simple() + else: + return self._solr.get_data_series_list() + + @tile_data() + def find_tile_by_id(self, tile_id, **kwargs): + return self._solr.find_tile_by_id(tile_id) + + @tile_data() + def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): + return self._solr.find_tiles_by_id(tile_ids, ds=ds, **kwargs) + + def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, **kwargs): + return self._solr.find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, + **kwargs) + + @tile_data() + def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs): + """ + Given a bounding polygon, dataset, and day of year, find tiles in that dataset with the same bounding + polygon and the closest day of year. + + For example: + given a polygon minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; and day of year=32 + search for first tile in MY_DS with identical bbox and day_of_year <= 32 (sorted by day_of_year desc) + + Valid matches: + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 + + Invalid matches: + minx=1, miny=0, maxx=2, maxy=1; dataset=MY_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_OTHER_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 if minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 also exists + + :param bounding_polygon: The exact bounding polygon of tiles to search for + :param ds: The dataset name being searched + :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned + :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found + """ + try: + tile = self._solr.find_tile_by_polygon_and_most_recent_day_of_year(bounding_polygon, ds, day_of_year) + except IndexError: + raise NexusTileServiceException("No tile found."), None, sys.exc_info()[2] + + return tile + + @tile_data() + def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + return self._solr.find_all_tiles_in_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, rows=5000, + **kwargs) + + @tile_data() + def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs): + return self._solr.find_all_tiles_in_polygon_at_time(bounding_polygon, dataset, time, rows=5000, + **kwargs) + + @tile_data() + def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): + # Find tiles that fall in the given box in the Solr index + return self._solr.find_all_tiles_in_box_sorttimeasc(min_lat, max_lat, min_lon, max_lon, ds, start_time, + end_time, **kwargs) + + @tile_data() + def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs): + # Find tiles that fall within the polygon in the Solr index + if 'sort' in kwargs.keys(): + tiles = self._solr.find_all_tiles_in_polygon(bounding_polygon, ds, start_time, end_time, **kwargs) + else: + tiles = self._solr.find_all_tiles_in_polygon_sorttimeasc(bounding_polygon, ds, start_time, end_time, + **kwargs) + return tiles + + @tile_data() + def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs): + """ + The method will return tiles with the exact given bounds within the time range. It differs from + find_tiles_in_polygon in that only tiles with exactly the given bounds will be returned as opposed to + doing a polygon intersection with the given bounds. + + :param bounds: (minx, miny, maxx, maxy) bounds to search for + :param ds: Dataset name to search + :param start_time: Start time to search (seconds since epoch) + :param end_time: End time to search (seconds since epoch) + :param kwargs: fetch_data: True/False = whether or not to retrieve tile data + :return: + """ + tiles = self._solr.find_tiles_by_exact_bounds(bounds[0], bounds[1], bounds[2], bounds[3], ds, start_time, + end_time) + return tiles + + @tile_data() + def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + return self._solr.find_all_boundary_tiles_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, rows=5000, + **kwargs) + + def get_tiles_bounded_by_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, + **kwargs): + tiles = self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs) + tiles = self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles) + + return tiles + + def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time=-1, **kwargs): + tiles = self.find_tiles_in_polygon(polygon, ds, start_time, end_time, **kwargs) + tiles = self.mask_tiles_to_polygon(polygon, tiles) + + return tiles + + def get_tiles_bounded_by_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + tiles = self.find_all_tiles_in_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs) + tiles = self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles) + + return tiles + + def get_tiles_bounded_by_polygon_at_time(self, polygon, dataset, time, **kwargs): + tiles = self.find_all_tiles_in_polygon_at_time(polygon, dataset, time, **kwargs) + tiles = self.mask_tiles_to_polygon(polygon, tiles) + + return tiles + + def get_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + tiles = self.find_all_boundary_tiles_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs) + tiles = self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles) + + return tiles + + def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + tiles = self._solr.find_all_tiles_within_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, + **kwargs) + + return tiles + + def get_bounding_box(self, tile_ids): + """ + Retrieve a bounding box that encompasses all of the tiles represented by the given tile ids. + :param tile_ids: List of tile ids + :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles + """ + tiles = self.find_tiles_by_id(tile_ids, fl=['tile_min_lat', 'tile_max_lat', 'tile_min_lon', 'tile_max_lon'], + fetch_data=False, rows=len(tile_ids)) + polys = [] + for tile in tiles: + polys.append(box(tile.bbox.min_lon, tile.bbox.min_lat, tile.bbox.max_lon, tile.bbox.max_lat)) + return box(*MultiPolygon(polys).bounds) + + def get_min_time(self, tile_ids, ds=None): + """ + Get the minimum tile date from the list of tile ids + :param tile_ids: List of tile ids + :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) + :return: long time in seconds since epoch + """ + min_time = self._solr.find_min_date_from_tiles(tile_ids, ds=ds) + return long((min_time - EPOCH).total_seconds()) + + def get_max_time(self, tile_ids, ds=None): + """ + Get the maximum tile date from the list of tile ids + :param tile_ids: List of tile ids + :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) + :return: long time in seconds since epoch + """ + max_time = self._solr.find_max_date_from_tiles(tile_ids, ds=ds) + return long((max_time - EPOCH).total_seconds()) + + def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): + """ + Get a list of distinct tile bounding boxes from all tiles within the given polygon and time range. + :param bounding_polygon: The bounding polygon of tiles to search for + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of distinct bounding boxes (as shapely polygons) for tiles in the search polygon + """ + bounds = self._solr.find_distinct_bounding_boxes_in_polygon(bounding_polygon, ds, start_time, end_time) + return [box(*b) for b in bounds] + + def mask_tiles_to_bbox(self, min_lat, max_lat, min_lon, max_lon, tiles): + + for tile in tiles: + tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat) + tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon) + + # Or together the masks of the individual arrays to create the new mask + data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ + | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ + | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] + + tile.data = ma.masked_where(data_mask, tile.data) + + tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] + + return tiles + + def mask_tiles_to_polygon(self, bounding_polygon, tiles): + + min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds + + for tile in tiles: + tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat) + tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon) + + # Or together the masks of the individual arrays to create the new mask + data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ + | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ + | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] + + tile.data = ma.masked_where(data_mask, tile.data) + + tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] + + return tiles + + def fetch_data_for_tiles(self, *tiles): + + nexus_tile_ids = set([tile.tile_id for tile in tiles]) + matched_tile_data = self._cass.fetch_nexus_tiles(*nexus_tile_ids) + tile_data_by_id = {str(a_tile_data.tile_id): a_tile_data for a_tile_data in matched_tile_data} + + missing_data = nexus_tile_ids.difference(tile_data_by_id.keys()) + if len(missing_data) > 0: + raise StandardError("Missing data for tile_id(s) %s." % missing_data) + + for a_tile in tiles: + lats, lons, times, data, meta = tile_data_by_id[a_tile.tile_id].get_lat_lon_time_data_meta() + + a_tile.latitudes = lats + a_tile.longitudes = lons + a_tile.times = times + a_tile.data = data + a_tile.meta_data = meta + + del (tile_data_by_id[a_tile.tile_id]) + + return tiles + + def _solr_docs_to_tiles(self, *solr_docs): + + tiles = [] + for solr_doc in solr_docs: + tile = Tile() + try: + tile.tile_id = solr_doc['id'] + except KeyError: + pass + + try: + tile.bbox = BBox( + solr_doc['tile_min_lat'], solr_doc['tile_max_lat'], + solr_doc['tile_min_lon'], solr_doc['tile_max_lon']) + except KeyError: + pass + + try: + tile.dataset = solr_doc['dataset_s'] + except KeyError: + pass + + try: + tile.dataset_id = solr_doc['dataset_id_s'] + except KeyError: + pass + + try: + tile.granule = solr_doc['granule_s'] + except KeyError: + pass + + try: + tile.min_time = solr_doc['tile_min_time_dt'] + except KeyError: + pass + + try: + tile.max_time = solr_doc['tile_max_time_dt'] + except KeyError: + pass + + try: + tile.section_spec = solr_doc['sectionSpec_s'] + except KeyError: + pass + + try: + tile.tile_stats = TileStats( + solr_doc['tile_min_val_d'], solr_doc['tile_max_val_d'], + solr_doc['tile_avg_val_d'], solr_doc['tile_count_i'] + ) + except KeyError: + pass + + tiles.append(tile) + + return tiles + + def pingSolr(self): + status = self._solr.ping() + if status and status["status"] == "OK": + return True + else: + return False http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/data-access/requirements.txt ---------------------------------------------------------------------- diff --git a/data-access/requirements.txt b/data-access/requirements.txt new file mode 100644 index 0000000..9897e8b --- /dev/null +++ b/data-access/requirements.txt @@ -0,0 +1,41 @@ +appnope==0.1.0 +awscli==1.11.141 +backports.shutil-get-terminal-size==1.0.0 +botocore==1.6.8 +cassandra-driver==3.5.0 +colorama==0.3.7 +ConfigArgParse==0.11.0 +contextlib2==0.5.4 +Cython==0.24 +decorator==4.0.11 +docutils==0.14 +enum34==1.1.6 +futures==3.1.1 +ipython==5.3.0 +ipython-genutils==0.2.0 +jmespath==0.9.3 +nexusproto==0.3 +numpy==1.11.1 +pathlib2==2.2.1 +pexpect==4.2.1 +pickleshare==0.7.4 +prompt-toolkit==1.0.14 +protobuf==2.6.1 +ptyprocess==0.5.1 +pyasn1==0.3.3 +Pygments==2.2.0 +python-dateutil==2.6.1 +pytz==2016.6.1 +PyYAML==3.12 +requests==2.13.0 +rsa==3.4.2 +s3transfer==0.1.10 +scandir==1.5 +semver==2.7.6 +Shapely==1.5.17 +simplegeneric==0.8.1 +six==1.10.0 +solrcloudpy==2.4.1 +solrpy==0.9.7 +traitlets==4.3.2 +wcwidth==0.1.7 http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/data-access/setup.py ---------------------------------------------------------------------- diff --git a/data-access/setup.py b/data-access/setup.py new file mode 100644 index 0000000..b2d88c0 --- /dev/null +++ b/data-access/setup.py @@ -0,0 +1,41 @@ +""" +Copyright (c) 2016 Jet Propulsion Laboratory, +California Institute of Technology. All rights reserved +""" +import setuptools +from Cython.Build import cythonize + +__version__ = '0.32' + +setuptools.setup( + name="nexus-data-access", + version=__version__, + url="https://github.jpl.nasa.gov/thuang/nexus", + + author="Team Nexus", + + description="NEXUS API.", + long_description=open('README.md').read(), + + packages=['nexustiles', 'nexustiles.model', 'nexustiles.dao'], + package_data={'nexustiles': ['config/datastores.ini']}, + platforms='any', + setup_requires=['cython'], + install_requires=[ + 'cassandra-driver==3.5.0', + 'solrpy==0.9.7', + 'requests', + 'nexusproto', + 'shapely' + ], + + classifiers=[ + 'Development Status :: 1 - Pre-Alpha', + 'Intended Audience :: Developers', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2.7', + ], + + ext_modules=cythonize(["**/*.pyx"]), + zip_safe=False +) http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/data-access/tests/__init__.py ---------------------------------------------------------------------- diff --git a/data-access/tests/__init__.py b/data-access/tests/__init__.py new file mode 100644 index 0000000..bd9282c --- /dev/null +++ b/data-access/tests/__init__.py @@ -0,0 +1,4 @@ +""" +Copyright (c) 2016 Jet Propulsion Laboratory, +California Institute of Technology. All rights reserved +""" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/data-access/tests/config/datastores.ini ---------------------------------------------------------------------- diff --git a/data-access/tests/config/datastores.ini b/data-access/tests/config/datastores.ini new file mode 100644 index 0000000..194760c --- /dev/null +++ b/data-access/tests/config/datastores.ini @@ -0,0 +1,9 @@ +[cassandra] +host=127.0.0.1 +keyspace=nexustiles +local_datacenter=datacenter1 +protocol_version=3 + +[solr] +host=localhost:8983 +core=nexustiles \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/data-access/tests/nexusmodel_test.py ---------------------------------------------------------------------- diff --git a/data-access/tests/nexusmodel_test.py b/data-access/tests/nexusmodel_test.py new file mode 100644 index 0000000..e07579d --- /dev/null +++ b/data-access/tests/nexusmodel_test.py @@ -0,0 +1,370 @@ +""" +Copyright (c) 2016 Jet Propulsion Laboratory, +California Institute of Technology. All rights reserved +""" + +import unittest +import numpy as np +from nexustiles.model.nexusmodel import get_approximate_value_for_lat_lon, Tile, BBox + + +class TestApproximateValueMethod(unittest.TestCase): + def test_lat_exact_lon_exact(self): + tile = Tile() + tile.bbox = BBox(-1.0, 1.0, -2.0, 2.0) + tile.latitudes = np.ma.array([-1.0, -0.5, 0, .5, 1.0]) + tile.longitudes = np.ma.array([-2.0, -1.0, 0, 1.0, 2.0]) + tile.times = np.ma.array([0L]) + tile.data = np.ma.arange(25.0).reshape((1, 5, 5)) + + # -2 -1 0 1 2 + # -1.0 [[[0. 1. 2. 3. 4. ] + # -0.5 [5. 6. 7. 8. 9. ] + # 0. [10. 11. 12. 13. 14.] + # 0.5 [15. 16. 17. 18. 19.] + # 1.0 [20. 21. 22. 23. 24.]]] + + self.assertAlmostEqual(22.0, get_approximate_value_for_lat_lon([tile], 1.0, 0)) + + def test_lat_lon_exact_min(self): + tile = Tile() + tile.bbox = BBox(-1.0, 1.0, -2.0, 2.0) + tile.latitudes = np.ma.array([-1.0, -0.5, 0, .5, 1.0]) + tile.longitudes = np.ma.array([-2.0, -1.0, 0, 1.0, 2.0]) + tile.times = np.ma.array([0L]) + tile.data = np.ma.arange(25.0).reshape((1, 5, 5)) + + # -2 -1 0 1 2 + # -1.0 [[[0. 1. 2. 3. 4. ] + # -0.5 [5. 6. 7. 8. 9. ] + # 0. [10. 11. 12. 13. 14.] + # 0.5 [15. 16. 17. 18. 19.] + # 1.0 [20. 21. 22. 23. 24.]]] + + self.assertAlmostEqual(0, get_approximate_value_for_lat_lon([tile], -1.0, -2.0)) + + def test_lat_approx_lon_exact(self): + tile = Tile() + tile.bbox = BBox(-1.0, 1.0, -2.0, 2.0) + tile.latitudes = np.ma.array([-1.0, -0.5, 0, .5, 1.0]) + tile.longitudes = np.ma.array([-2.0, -1.0, 0, 1.0, 2.0]) + tile.times = np.ma.array([0L]) + tile.data = np.ma.arange(25.0).reshape((1, 5, 5)) + + # -2 -1 0 1 2 + # -1.0 [[[0. 1. 2. 3. 4. ] + # -0.5 [5. 6. 7. 8. 9. ] + # 0. [10. 11. 12. 13. 14.] + # 0.5 [15. 16. 17. 18. 19.] + # 1.0 [20. 21. 22. 23. 24.]]] + + self.assertAlmostEqual(5.0, get_approximate_value_for_lat_lon([tile], -0.4, -2.0)) + + def test_lat_approx_lon_approx(self): + tile = Tile() + tile.bbox = BBox(-1.0, 1.0, -2.0, 2.0) + tile.latitudes = np.ma.array([-1.0, -0.5, 0, .5, 1.0]) + tile.longitudes = np.ma.array([-2.0, -1.0, 0, 1.0, 2.0]) + tile.times = np.ma.array([0L]) + tile.data = np.ma.arange(25.0).reshape((1, 5, 5)) + + # -2 -1 0 1 2 + # -1.0 [[[0. 1. 2. 3. 4. ] + # -0.5 [5. 6. 7. 8. 9. ] + # 0. [10. 11. 12. 13. 14.] + # 0.5 [15. 16. 17. 18. 19.] + # 1.0 [20. 21. 22. 23. 24.]]] + + self.assertAlmostEqual(7.0, get_approximate_value_for_lat_lon([tile], -0.4, 0.01)) + + def test_lat_exact_lon_approx(self): + tile = Tile() + tile.bbox = BBox(-1.0, 1.0, -2.0, 2.0) + tile.latitudes = np.ma.array([-1.0, -0.5, 0, .5, 1.0]) + tile.longitudes = np.ma.array([-2.0, -1.0, 0, 1.0, 2.0]) + tile.times = np.ma.array([0L]) + tile.data = np.ma.arange(25.0).reshape((1, 5, 5)) + + # -2 -1 0 1 2 + # -1.0 [[[0. 1. 2. 3. 4. ] + # -0.5 [5. 6. 7. 8. 9. ] + # 0. [10. 11. 12. 13. 14.] + # 0.5 [15. 16. 17. 18. 19.] + # 1.0 [20. 21. 22. 23. 24.]]] + + self.assertAlmostEqual(18.0, get_approximate_value_for_lat_lon([tile], 0.5, 1.01)) + + def test_lat_greater_than_bounds(self): + tile = Tile() + tile.bbox = BBox(-1.0, 1.0, -2.0, 2.0) + tile.latitudes = np.ma.array([-1.0, -0.5, 0, .5, 1.0]) + tile.longitudes = np.ma.array([-2.0, -1.0, 0, 1.0, 2.0]) + tile.times = np.ma.array([0L]) + tile.data = np.ma.arange(25.0).reshape((1, 5, 5)) + + # -2 -1 0 1 2 + # -1.0 [[[0. 1. 2. 3. 4. ] + # -0.5 [5. 6. 7. 8. 9. ] + # 0. [10. 11. 12. 13. 14.] + # 0.5 [15. 16. 17. 18. 19.] + # 1.0 [20. 21. 22. 23. 24.]]] + + self.assertTrue(np.isnan(get_approximate_value_for_lat_lon([tile], 2.0, 0))) + + def test_lat_less_than_bounds(self): + tile = Tile() + tile.bbox = BBox(-1.0, 1.0, -2.0, 2.0) + tile.latitudes = np.ma.array([-1.0, -0.5, 0, .5, 1.0]) + tile.longitudes = np.ma.array([-2.0, -1.0, 0, 1.0, 2.0]) + tile.times = np.ma.array([0L]) + tile.data = np.ma.arange(25.0).reshape((1, 5, 5)) + + # -2 -1 0 1 2 + # -1.0 [[[0. 1. 2. 3. 4. ] + # -0.5 [5. 6. 7. 8. 9. ] + # 0. [10. 11. 12. 13. 14.] + # 0.5 [15. 16. 17. 18. 19.] + # 1.0 [20. 21. 22. 23. 24.]]] + + self.assertTrue(np.isnan(get_approximate_value_for_lat_lon([tile], -2.0, 0))) + + def test_lon_greater_than_bounds(self): + tile = Tile() + tile.bbox = BBox(-1.0, 1.0, -2.0, 2.0) + tile.latitudes = np.ma.array([-1.0, -0.5, 0, .5, 1.0]) + tile.longitudes = np.ma.array([-2.0, -1.0, 0, 1.0, 2.0]) + tile.times = np.ma.array([0L]) + tile.data = np.ma.arange(25.0).reshape((1, 5, 5)) + + # -2 -1 0 1 2 + # -1.0 [[[0. 1. 2. 3. 4. ] + # -0.5 [5. 6. 7. 8. 9. ] + # 0. [10. 11. 12. 13. 14.] + # 0.5 [15. 16. 17. 18. 19.] + # 1.0 [20. 21. 22. 23. 24.]]] + + self.assertTrue(np.isnan(get_approximate_value_for_lat_lon([tile], 0, 3))) + + def test_lon_less_than_bounds(self): + tile = Tile() + tile.bbox = BBox(-1.0, 1.0, -2.0, 2.0) + tile.latitudes = np.ma.array([-1.0, -0.5, 0, .5, 1.0]) + tile.longitudes = np.ma.array([-2.0, -1.0, 0, 1.0, 2.0]) + tile.times = np.ma.array([0L]) + tile.data = np.ma.arange(25.0).reshape((1, 5, 5)) + + # -2 -1 0 1 2 + # -1.0 [[[0. 1. 2. 3. 4. ] + # -0.5 [5. 6. 7. 8. 9. ] + # 0. [10. 11. 12. 13. 14.] + # 0.5 [15. 16. 17. 18. 19.] + # 1.0 [20. 21. 22. 23. 24.]]] + + self.assertTrue(np.isnan(get_approximate_value_for_lat_lon([tile], 0, -3))) + + def test_repeated_lats(self): + tile = Tile() + tile.bbox = BBox(-1.0, 1.0, -2.0, 2.0) + tile.latitudes = np.ma.array([-1.0, -0.5, -0.5, .5, 1.0]) + tile.longitudes = np.ma.array([-2.0, -1.0, 0, 1.0, 2.0]) + tile.times = np.ma.array([0L]) + tile.data = np.ma.arange(25.0).reshape((1, 5, 5)) + + # -2 -1 0 1 2 + # -1.0 [[[0. 1. 2. 3. 4. ] + # -0.5 [5. 6. 7. 8. 9. ] + # -0.5 [10. 11. 12. 13. 14.] + # 0.5 [15. 16. 17. 18. 19.] + # 1.0 [20. 21. 22. 23. 24.]]] + + self.assertAlmostEqual(6.0, get_approximate_value_for_lat_lon([tile], -0.4, -1)) + + def test_multiple_tiles(self): + tile1 = Tile() + tile1.latitudes = np.ma.array([0.0, 1.0, 2.0, 3.0]) + tile1.longitudes = np.ma.array([0.0, -1.0, -2.0]) + tile1.times = np.ma.array([0L]) + tile1.data = np.ma.arange(12).reshape((1, 4, 3)) + + tile2 = Tile() + tile2.latitudes = np.ma.array([4.0, 5.0, 6.0, 7.0]) + tile2.longitudes = np.ma.array([-3.0, -4.0, -5.0]) + tile2.times = np.ma.array([0L]) + tile2.data = np.ma.arange(12, 24).reshape((1, 4, 3)) + + self.assertAlmostEqual(1, get_approximate_value_for_lat_lon([tile1, tile2], 0.4, -1)) + + def test_multiple_tiles_same_long(self): + tile1 = Tile() + tile1.latitudes = np.ma.array([0.0, 1.0, 2.0, 3.0]) + tile1.longitudes = np.ma.array([0.0, -1.0, -2.0]) + tile1.times = np.ma.array([0L]) + tile1.data = np.ma.arange(12).reshape((1, 4, 3)) + + tile2 = Tile() + tile2.latitudes = np.ma.array([4.0, 5.0, 6.0, 7.0]) + tile2.longitudes = np.ma.array([0.0, -1.0, -2.0]) + tile2.times = np.ma.array([0L]) + tile2.data = np.ma.arange(12, 24).reshape((1, 4, 3)) + + self.assertAlmostEqual(1, get_approximate_value_for_lat_lon([tile1, tile2], 0.4, -1)) + + +class TestTileContainsMethod(unittest.TestCase): + def test_masked_tile(self): + tile = Tile() + tile.bbox = BBox(30.5, 37.5, -51.5, -36.5) + tile.latitudes = np.ma.arange(30.5, 38.5, 1.0) + tile.longitudes = np.ma.arange(-51.5, -35.5, 1.0) + tile.times = np.ma.array([0L]) + tile.data = np.ma.arange(128.0).reshape((1, 8, 16)) + + # tile.latitudes [ 30.5 31.5 32.5 33.5 34.5 35.5 36.5 37.5] + tile.latitudes = np.ma.masked_outside(tile.latitudes, 35, 45) + # tile.latitudes [-- -- -- -- -- 35.5 36.5 37.5] + + # tile.longitudes [-51.5 -50.5 -49.5 -48.5 -47.5 -46.5 -45.5 -44.5 -43.5 -42.5 -41.5 -40.5 -39.5 -38.5 -37.5 -36.5] + tile.longitudes = np.ma.masked_outside(tile.longitudes, -50, -40) + # tile.longitudes [-- -- -49.5 -48.5 -47.5 -46.5 -45.5 -44.5 -43.5 -42.5 -41.5 -40.5 -- -- -- --] + + # Tile no longer contains 35, -50 + self.assertFalse(tile.contains_point(35, -50)) + + +class TestTileUpdateStats(unittest.TestCase): + def test_update_tile_stats(self): + tile = Tile() + tile.bbox = BBox(-1.0, 1.0, -2.0, 2.0) + tile.latitudes = np.ma.array([-1.0, -0.5, 0, .5, 1.0]) + tile.longitudes = np.ma.array([-2.0, -1.0, 0, 1.0, 2.0]) + tile.times = np.ma.array([0L]) + tile.data = np.ma.arange(25.0).reshape((1, 5, 5)) + + # -2 -1 0 1 2 + # -1.0 [[[0. 1. 2. 3. 4. ] + # -0.5 [5. 6. 7. 8. 9. ] + # 0. [10. 11. 12. 13. 14.] + # 0.5 [15. 16. 17. 18. 19.] + # 1.0 [20. 21. 22. 23. 24.]]] + + tile.update_stats() + + self.assertAlmostEqual(0.0, tile.tile_stats.min) + self.assertAlmostEqual(24.0, tile.tile_stats.max) + self.assertAlmostEqual(12.0, tile.tile_stats.mean) + self.assertEqual(25, tile.tile_stats.count) + + +class TestMergeTilesMethod(unittest.TestCase): + def test_merge_tiles(self): + tile1 = Tile() + tile1.latitudes = np.ma.array([0.0, 1.0, 2.0, 3.0]) + tile1.longitudes = np.ma.array([0.0, -1.0, -2.0]) + tile1.times = np.ma.array([0L]) + tile1.data = np.ma.arange(12).reshape((1, 4, 3)) + + tile2 = Tile() + tile2.latitudes = np.ma.array([4.0, 5.0, 6.0, 7.0]) + tile2.longitudes = np.ma.array([-3.0, -4.0, -5.0]) + tile2.times = np.ma.array([0L]) + tile2.data = np.ma.arange(12, 24).reshape((1, 4, 3)) + + from nexustiles.model.nexusmodel import merge_tiles + + times, lats, longs, data = merge_tiles([tile1, tile2]) + + self.assertTrue(np.ma.allequal(times, np.array([0L]))) + self.assertTrue(np.ma.allequal(lats, np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]))) + self.assertTrue(np.ma.allequal(longs, np.array([-5.0, -4.0, -3.0, -2.0, -1.0, 0.0]))) + expected = np.ma.array([[[0, 0, 0, 2, 1, 0], + [0, 0, 0, 5, 4, 3], + [0, 0, 0, 8, 7, 6], + [0, 0, 0, 11, 10, 9], + [14, 13, 12, 0, 0, 0], + [17, 16, 15, 0, 0, 0], + [20, 19, 18, 0, 0, 0], + [23, 22, 21, 0, 0, 0]]], mask=np.array([[[True, True, True, False, False, False], + [True, True, True, False, False, False], + [True, True, True, False, False, False], + [True, True, True, False, False, False], + [False, False, False, True, True, True], + [False, False, False, True, True, True], + [False, False, False, True, True, True], + [False, False, False, True, True, True]]])) + self.assertTrue(np.ma.allequal(np.ma.getmaskarray(data), np.ma.getmaskarray(expected))) + self.assertTrue(np.ma.allequal(data, expected)) + + def test_merge_tiles_vertical(self): + tile1 = Tile() + tile1.latitudes = np.ma.array([0.0, 1.0, 2.0, 3.0]) + tile1.longitudes = np.ma.array([0.0, -1.0, -2.0]) + tile1.times = np.ma.array([0L]) + tile1.data = np.ma.arange(12).reshape((1, 4, 3)) + + tile2 = Tile() + tile2.latitudes = np.ma.array([4.0, 5.0, 6.0, 7.0]) + tile2.longitudes = np.ma.array([0.0, -1.0, -2.0]) + tile2.times = np.ma.array([0L]) + tile2.data = np.ma.arange(12, 24).reshape((1, 4, 3)) + + from nexustiles.model.nexusmodel import merge_tiles + + times, lats, longs, data = merge_tiles([tile1, tile2]) + + self.assertTrue(np.ma.allequal(times, np.array([0L]))) + self.assertTrue(np.ma.allequal(lats, np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]))) + self.assertTrue(np.ma.allequal(longs, np.array([-2.0, -1.0, 0.0]))) + expected = np.ma.array([[[2, 1, 0], + [5, 4, 3], + [8, 7, 6], + [11, 10, 9], + [14, 13, 12], + [17, 16, 15], + [20, 19, 18], + [23, 22, 21]]], mask=False) + self.assertTrue(np.ma.allequal(np.ma.getmaskarray(data), np.ma.getmaskarray(expected))) + self.assertTrue(np.ma.allequal(data, expected)) + + def test_merge_tiles_horizontal(self): + tile1 = Tile() + tile1.latitudes = np.ma.array([0.0, 1.0, 2.0, 3.0]) + tile1.longitudes = np.ma.array([0.0, -1.0, -2.0]) + tile1.times = np.ma.array([0L]) + tile1.data = np.ma.arange(12).reshape((1, 4, 3)) + + tile2 = Tile() + tile2.latitudes = np.ma.array([0.0, 1.0, 2.0, 3.0]) + tile2.longitudes = np.ma.array([-3.0, -4.0, -5.0]) + tile2.times = np.ma.array([0L]) + tile2.data = np.ma.arange(12, 24).reshape((1, 4, 3)) + + from nexustiles.model.nexusmodel import merge_tiles + + times, lats, longs, data = merge_tiles([tile1, tile2]) + + self.assertTrue(np.ma.allequal(times, np.array([0L]))) + self.assertTrue(np.ma.allequal(lats, np.array([0.0, 1.0, 2.0, 3.0]))) + self.assertTrue(np.ma.allequal(longs, np.array([-5.0, -4.0, -3.0, -2.0, -1.0, 0.0]))) + expected = np.ma.array([[[14, 13, 12, 2, 1, 0], + [17, 16, 15, 5, 4, 3], + [20, 19, 18, 8, 7, 6], + [23, 22, 21, 11, 10, 9]]], mask=False) + self.assertTrue(np.ma.allequal(np.ma.getmaskarray(data), np.ma.getmaskarray(expected))) + self.assertTrue(np.ma.allequal(data, expected)) + + def test_merge_tiles_overlapping(self): + tile1 = Tile() + tile1.latitudes = np.ma.array([0.0, 1.0, 2.0, 3.0]) + tile1.longitudes = np.ma.array([0.0, -1.0, -2.0]) + tile1.times = np.ma.array([0L]) + tile1.data = np.ma.arange(12).reshape((1, 4, 3)) + + tile2 = Tile() + tile2.latitudes = np.ma.array([0.0, 1.0, 2.0, 3.0]) + tile2.longitudes = np.ma.array([0.0, -1.0, -2.0]) + tile2.times = np.ma.array([0L]) + tile2.data = np.ma.arange(12, 24).reshape((1, 4, 3)) + + from nexustiles.model.nexusmodel import merge_tiles + + self.assertRaises(Exception, lambda _: merge_tiles([tile1, tile2])) http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/data-access/tests/nexustiles_test.py ---------------------------------------------------------------------- diff --git a/data-access/tests/nexustiles_test.py b/data-access/tests/nexustiles_test.py new file mode 100644 index 0000000..e676250 --- /dev/null +++ b/data-access/tests/nexustiles_test.py @@ -0,0 +1,92 @@ +""" +Copyright (c) 2016 Jet Propulsion Laboratory, +California Institute of Technology. All rights reserved +""" +import ConfigParser +import time +import unittest +from StringIO import StringIO + +from nexustiles.nexustiles import NexusTileService +from shapely.geometry import box + + +class TestService(unittest.TestCase): + def setUp(self): + config = StringIO("""[cassandra] +host=127.0.0.1 +keyspace=nexustiles +local_datacenter=datacenter1 +protocol_version=3 +port=32769 + +[solr] +host=localhost:8986 +core=nexustiles""") + cp = ConfigParser.RawConfigParser() + cp.readfp(config) + + self.tile_service = NexusTileService(config=cp) + + def test_get_distinct_bounding_boxes_in_polygon(self): + boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), + "MXLDEPTH_ECCO_version4_release1", + 1, time.time()) + for b in boxes: + print b.bounds + + def test_get_distinct_bounding_boxes_in_polygon_mur(self): + boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), + "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", + 1, time.time()) + for b in boxes: + print b.bounds + + def test_find_tiles_by_exact_bounds(self): + tiles = self.tile_service.find_tiles_by_exact_bounds((175.01, -42.68, 180.0, -40.2), + "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", + 1, time.time()) + for tile in tiles: + print tile.get_summary() + + def test_sorted_box(self): + + tiles = self.tile_service.get_tiles_bounded_by_box(-42.68, -40.2, 175.01, 180.0, + "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", + 1, time.time()) + for tile in tiles: + print tile.min_time + + +# from nexustiles.model.nexusmodel import get_approximate_value_for_lat_lon +# import numpy as np +# +# service = NexusTileService() + +# assert service is not None + +# tiles = service.find_tiles_in_box(-90, 90, -180, 180, ds='AVHRR_OI_L4_GHRSST_NCEI') +# +# print '\n'.join([str(tile.data.shape) for tile in tiles]) + +# ASCATB +# tiles = service.find_tile_by_id('43c63dce-1f6e-3c09-a7b2-e0efeb3a72f2') +# MUR +# tiles = service.find_tile_by_id('d9b5afe3-bd7f-3824-ad8a-d8d3b364689c') +# SMAP +# tiles = service.find_tile_by_id('7eee40ef-4c6e-32d8-9a67-c83d4183f724') +# tile = tiles[0] +# +# print get_approximate_value_for_lat_lon([tile], np.min(tile.latitudes), np.min(tile.longitudes) + .005) +# print tile.latitudes +# print tile.longitudes +# print tile.data +# tile +# print type(tile.data) +# +# assert len(tiles) == 1 +# +# tile = tiles[0] +# assert tile.meta_data is not None +# +# print tile.get_summary() http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/data-access/tests/sizefromcass.py ---------------------------------------------------------------------- diff --git a/data-access/tests/sizefromcass.py b/data-access/tests/sizefromcass.py new file mode 100644 index 0000000..3671fd7 --- /dev/null +++ b/data-access/tests/sizefromcass.py @@ -0,0 +1,22 @@ +""" +Copyright (c) 2016 Jet Propulsion Laboratory, +California Institute of Technology. All rights reserved +""" + +import pyximport +pyximport.install() + +import ConfigParser + +import pkg_resources + +from nexustiles.dao.CassandraProxy import CassandraProxy + +config = ConfigParser.RawConfigParser() + +config.readfp(pkg_resources.resource_stream(__name__, "config/datastores.ini"), filename='datastores.ini') + +cass = CassandraProxy(config) + +tiles = cass.fetch_nexus_tiles('d9b5afe3-bd7f-3824-ad8a-d8d3b364689c') +print len(tiles[0].tile_blob) http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/data-access/tests/solr_update.py ---------------------------------------------------------------------- diff --git a/data-access/tests/solr_update.py b/data-access/tests/solr_update.py new file mode 100644 index 0000000..765262a --- /dev/null +++ b/data-access/tests/solr_update.py @@ -0,0 +1,23 @@ +""" +Copyright (c) 2016 Jet Propulsion Laboratory, +California Institute of Technology. All rights reserved +""" + +import solr + +solrcon = solr.Solr('http://%s/solr/%s' % ('localhost:8983', 'nexustiles')) + +ds = 'MXLDEPTH_ECCO_version4_release1' + +# print solrcon.select(q='dataset_s:%s' % ds, sort='id', cursorMark='*').results + + +params = {'q': 'dataset_s:%s' % ds, 'sort': 'id', 'cursorMark': '*', 'rows': 5000} +done = False +while not done: + response = solrcon.select(**params) + print len(response.results) + if params['cursorMark'] == response.nextCursorMark: + done = True + + params['cursorMark'] = response.nextCursorMark http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/data-access/tests/solrproxy_test.py ---------------------------------------------------------------------- diff --git a/data-access/tests/solrproxy_test.py b/data-access/tests/solrproxy_test.py new file mode 100644 index 0000000..eb49199 --- /dev/null +++ b/data-access/tests/solrproxy_test.py @@ -0,0 +1,62 @@ +""" +Copyright (c) 2016 Jet Propulsion Laboratory, +California Institute of Technology. All rights reserved +""" +import unittest +import ConfigParser + +import logging +import pkg_resources +import time + +from nexustiles.dao.SolrProxy import SolrProxy +from shapely.geometry import box + + +class TestQuery(unittest.TestCase): + def setUp(self): + config = ConfigParser.RawConfigParser() + + config.readfp(pkg_resources.resource_stream(__name__, "config/datastores.ini"), filename='datastores.ini') + + self.proxy = SolrProxy(config) + logging.basicConfig(level=logging.DEBUG) + + def find_distinct_section_specs_in_polygon_test(self): + result = self.proxy.find_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), + "MXLDEPTH_ECCO_version4_release1", + 1, time.time()) + + print len(result) + for r in sorted(result): + print r + + def find_all_tiles_in_polygon_with_spec_test(self): + result = self.proxy.find_all_tiles_in_polygon(box(-180, -90, 180, 90), + "AVHRR_OI_L4_GHRSST_NCEI", + fq={'sectionSpec_s:\"time:0:1,lat:100:120,lon:0:40\"'}, + rows=1, limit=1) + + print result + + def find_tiles_by_id_test(self): + result = self.proxy.find_tiles_by_id(['0cc95db3-293b-3553-b7a3-42920c3ffe4d'], ds="AVHRR_OI_L4_GHRSST_NCEI") + + print result + + def find_max_date_from_tiles_test(self): + result = self.proxy.find_max_date_from_tiles(["a764f12b-ceac-38d6-9d1d-89a6b68db32b"], + "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", rows=1, limit=1) + + print result + + def find_tiles_by_exact_bounds_test(self): + result = self.proxy.find_tiles_by_exact_bounds(175.01, -42.68, 180.0, -40.2, + "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", rows=5000) + + print len(result) + + def get_data_series_list_test(self): + result = self.proxy.get_data_series_list() + + print len(result) http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/.gitignore ---------------------------------------------------------------------- diff --git a/docker/.gitignore b/docker/.gitignore new file mode 100644 index 0000000..bdec9b9 --- /dev/null +++ b/docker/.gitignore @@ -0,0 +1 @@ +docker-run-commands.txt http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/README.md ---------------------------------------------------------------------- diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..b80ece9 --- /dev/null +++ b/docker/README.md @@ -0,0 +1 @@ +# NEXUS Docker \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/cassandra/Dockerfile ---------------------------------------------------------------------- diff --git a/docker/cassandra/Dockerfile b/docker/cassandra/Dockerfile new file mode 100644 index 0000000..59f9022 --- /dev/null +++ b/docker/cassandra/Dockerfile @@ -0,0 +1,5 @@ +FROM cassandra:2.2.8 + +RUN apt-get update && apt-get -y install git && rm -rf /var/lib/apt/lists/* + +RUN cd / && git clone https://github.com/dataplumber/nexus.git && cp -r /nexus/data-access/config/schemas/cassandra/nexustiles.cql /tmp/. && rm -rf /nexus http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/cassandra/README.md ---------------------------------------------------------------------- diff --git a/docker/cassandra/README.md b/docker/cassandra/README.md new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/ingest-admin/Dockerfile ---------------------------------------------------------------------- diff --git a/docker/ingest-admin/Dockerfile b/docker/ingest-admin/Dockerfile new file mode 100644 index 0000000..e11f71d --- /dev/null +++ b/docker/ingest-admin/Dockerfile @@ -0,0 +1,12 @@ +FROM nexusjpl/ingest-base + +USER root +RUN yum install -y https://archive.cloudera.com/cdh5/one-click-install/redhat/7/x86_64/cloudera-cdh-5-0.x86_64.rpm && \ + yum install -y zookeeper + +COPY nx-env.sh /usr/local/nx-env.sh +COPY nx-deploy-stream.sh /usr/local/nx-deploy-stream.sh + +USER springxd +ENTRYPOINT ["/usr/local/nexus-ingest.sh"] +CMD ["--admin"] \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/ingest-admin/README.md ---------------------------------------------------------------------- diff --git a/docker/ingest-admin/README.md b/docker/ingest-admin/README.md new file mode 100644 index 0000000..6da6db1 --- /dev/null +++ b/docker/ingest-admin/README.md @@ -0,0 +1,86 @@ +# ingest-admin Docker + +This can be used to start a spring-xd admin node. + +# Docker Compose + +Use the [docker-compose.yml](docker-compose.yml) file to start up mysql, redis, and xd-admin in one command. Example: + + MYSQL_PASSWORD=admin ZK_HOST_IP=10.200.10.1 KAFKA_HOST_IP=10.200.10.1 docker-compose up + +`MYSQL_PASSWORD` sets the password for a new MySQL user called `xd` when the MySQL database is initialized. +`ZK_HOST_IP` must be set to a valid IP address of a zookeeper host that will be used to manage Spring XD. +`KAFKA_HOST_IP` must be set to a valid IP address of a kafka broker that will be used for the transport layer of Spring XD + +# Docker Run + +This container relies on 4 external services that must already be running: MySQL, Redis, Zookeeper, and Kafka. + +To start the server use: + + docker run -it \ + -e "MYSQL_PORT_3306_TCP_ADDR=mysqldb" -e "MYSQL_PORT_3306_TCP_PORT=3306" \ + -e "MYSQL_USER=xd" -e "MYSQL_PASSWORD=admin" \ + -e "REDIS_ADDR=redis" -e "REDIS_PORT=6397" \ + -e "ZOOKEEPER_CONNECT=zkhost:2181" -e "ZOOKEEPER_XD_CHROOT=springxd" \ + -e "KAFKA_BROKERS=kafka1:9092" -e "KAFKA_ZKADDRESS=zkhost:2181/kafka" + --add-host="zkhost:10.200.10.1" \ + --add-host="kafka1:10.200.10.1" + --name xd-admin nexusjpl/ingest-admin + +This mode requires a number of Environment Variables to be defined. + +##### `MYSQL_PORT_3306_TCP_ADDR` + +Address to a running MySQL service + +##### `MYSQL_PORT_3306_TCP_PORT` + +Port for running MySQL service + +##### `MYSQL_USER` + +Username to connnect to MySQL service + +##### `MYSQL_PASSWORD` + +Password for connecting to MySQL service + +##### `ZOOKEEPER_CONNECT` + +Zookeeper connect string. Can be a comma-delimmited list of host:port values. + +##### `ZOOKEEPER_XD_CHROOT` + +Zookeeper root node for spring-xd + +##### `REDIS_ADDR` + +Address to a running Redis service + +##### `REDIS_PORT` + +Port for running Redis service + +##### `KAFKA_BROKERS` + +Comma-delimmited list of host:port values which define the list of Kafka brokers used for transport. + +##### `KAFKA_ZKADDRESS` + +Specifies the ZooKeeper connection string in the form hostname:port where host and port are the host and port of a ZooKeeper server. + +The server may also have a ZooKeeper chroot path as part of its ZooKeeper connection string which puts its data under some path in the global ZooKeeper namespace. If so the consumer should use the same chroot path in its connection string. For example to give a chroot path of `/chroot/path` you would give the connection string as `hostname1:port1,hostname2:port2,hostname3:port3/chroot/path`. + +# XD Shell + +## Using Docker Exec + +Once the xd-admin container is running you can use docker exec to start an XD Shell that communicates with the xd-admin server: + + docker exec -it xd-admin xd-shell + +## Using Standalone Container +You can use the springxd shell docker image to start a seperate container running XD shell connected to the xd-admin server: + + docker run -it --network container:xd-admin springxd/shell \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/ingest-admin/docker-compose.yml ---------------------------------------------------------------------- diff --git a/docker/ingest-admin/docker-compose.yml b/docker/ingest-admin/docker-compose.yml new file mode 100644 index 0000000..0f2c5b6 --- /dev/null +++ b/docker/ingest-admin/docker-compose.yml @@ -0,0 +1,73 @@ +version: '3' + +networks: + ingestnetwork: + nexus: + external: true + +services: + + mysqldb: + image: mysql:8 + hostname: mysqldb + expose: + - "3306" + environment: + - MYSQL_RANDOM_ROOT_PASSWORD=yes + - MYSQL_DATABASE=xdjob + - MYSQL_USER=xd + - MYSQL_PASSWORD=${MYSQL_PASSWORD} + networks: + - ingestnetwork + - nexus + deploy: + placement: + constraints: + - nexus.ingest-admin == true + + redis: + image: redis:3 + container_name: redis + expose: + - "6379" + networks: + - ingestnetwork + - nexus + deploy: + placement: + constraints: + - nexus.ingest-admin == true + + xd-admin: + image: nexusjpl/ingest-admin + container_name: xd-admin + command: [-a] + environment: + - MYSQL_PORT_3306_TCP_ADDR=mysqldb + - MYSQL_PORT_3306_TCP_PORT=3306 + - MYSQL_USER=xd + - MYSQL_PASSWORD=${MYSQL_PASSWORD} + - REDIS_ADDR=redis + - REDIS_PORT=6379 + - "ZOOKEEPER_CONNECT=zkhost:2181" + - ZOOKEEPER_XD_CHROOT=springxd + - "KAFKA_BROKERS=kafka1:9092" + - "KAFKA_ZKADDRESS=zkhost:2181/kafka" + depends_on: + - mysqldb + - redis + extra_hosts: + - "zkhost:$ZK_HOST_IP" + - "kafka1:$KAFKA_HOST_IP" + networks: + - ingestnetwork + - nexus + deploy: + placement: + constraints: + - nexus.ingest-admin == true + restart_policy: + condition: on-failure + delay: 5s + max_attempts: 3 + window: 120s \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/ingest-admin/nx-deploy-stream.sh ---------------------------------------------------------------------- diff --git a/docker/ingest-admin/nx-deploy-stream.sh b/docker/ingest-admin/nx-deploy-stream.sh new file mode 100755 index 0000000..e77483a --- /dev/null +++ b/docker/ingest-admin/nx-deploy-stream.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +. /usr/local/nx-env.sh + +if [ $# -gt 0 ]; then + while true; do + case "$1" in + --datasetName) + DATASET_NAME="$2" + shift 2 + ;; + --dataDirectory) + DATA_DIR="$2" + shift 2 + ;; + --variableName) + VARIABLE="$2" + shift 2 + ;; + --tilesDesired) + TILES_DESIRED="$2" + shift 2 + ;; + *) + break # out-of-args, stop looping + + ;; + esac + done +fi + +echo "stream create --name ${DATASET_NAME} --definition \"scan-for-avhrr-granules: file --dir=${DATA_DIR} --mode=ref --pattern=*.nc --maxMessages=1 --fixedDelay=1 | header-absolutefilepath: header-enricher --headers={\\\"absolutefilepath\\\":\\\"payload\\\"} | dataset-tiler --dimensions=lat,lon --tilesDesired=${TILES_DESIRED} | join-with-static-time: transform --expression=\\\"'time:0:1,'+payload.stream().collect(T(java.util.stream.Collectors).joining(';time:0:1,'))+';file://'+headers['absolutefilepath']\\\" | python-chain: tcpshell --command='python -u -m nexusxd.processorchain' --environment=CHAIN=nexusxd.tilereadingprocessor.read_grid_data:nexusxd.emptytilefilter.filter_empty_tiles:nexusxd.kelvintocelsius.transform:nexusxd.tilesumarizingprocessor.summarize_nexustile,VARIABLE=${VARIABLE},LATITUDE=lat,LONGITUDE=lon,TIME=time,READER=GRIDTILE,TEMP_DIR=/tmp,STORED_VAR_NAME=${VARIABLE} --bufferSize=1000000 --remoteReplyTimeout=360000 | add-id: script --script=file:///usr/local/spring-x d/current/xd-nexus-shared/generate-tile-id.groovy | set-dataset-name: script --script=file:///usr/local/spring-xd/current/xd-nexus-shared/set-dataset-name.groovy --variables='datasetname=${DATASET_NAME}' | nexus --cassandraContactPoints=${CASS_HOST} --cassandraKeyspace=nexustiles --solrCloudZkHost=${SOLR_CLOUD_ZK_HOST} --solrCollection=nexustiles --cassandraPort=${CASS_PORT}\"" > /tmp/stream-create + +xd-shell --cmdfile /tmp/stream-create + +echo "stream deploy --name ${DATASET_NAME} --properties module.python-chain.count=3,module.nexus.count=3" > /tmp/stream-deploy + +xd-shell --cmdfile /tmp/stream-deploy http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/ingest-admin/nx-env.sh ---------------------------------------------------------------------- diff --git a/docker/ingest-admin/nx-env.sh b/docker/ingest-admin/nx-env.sh new file mode 100755 index 0000000..93ee22f --- /dev/null +++ b/docker/ingest-admin/nx-env.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +export SOLR_CLOUD_ZK_HOST=zk1:2181,zk2:2181,zk3:2181/solr +export CASS_HOST=cassandra1,cassandra2,cassandra3 +export CASS_PORT=9042 http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/ingest-base/Dockerfile ---------------------------------------------------------------------- diff --git a/docker/ingest-base/Dockerfile b/docker/ingest-base/Dockerfile new file mode 100644 index 0000000..e612ebe --- /dev/null +++ b/docker/ingest-base/Dockerfile @@ -0,0 +1,62 @@ +FROM nexusjpl/nexusbase + +WORKDIR /tmp + +RUN yum -y install unzip nc + +# Create conda environment and install dependencies +RUN conda create -y --name nexus-xd-python-modules python && \ + source activate nexus-xd-python-modules && \ + conda install -y scipy=0.18.1 && \ + conda install -y -c conda-forge nco=4.6.4 netcdf4=1.2.7 + +# Install Spring XD +RUN groupadd -r springxd && adduser -r -g springxd springxd + +WORKDIR /usr/local/spring-xd +RUN wget -q "http://repo.spring.io/libs-release/org/springframework/xd/spring-xd/1.3.1.RELEASE/spring-xd-1.3.1.RELEASE-dist.zip" && \ + unzip spring-xd-1.3.1.RELEASE-dist.zip && \ + rm spring-xd-1.3.1.RELEASE-dist.zip && \ + ln -s spring-xd-1.3.1.RELEASE current && \ + mkdir current/xd/lib/none + +RUN wget -q "https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-5.0.8.tar.gz" && \ + tar -zxf mysql-connector-java-5.0.8.tar.gz && \ + mv mysql-connector-java-5.0.8/mysql-connector-java-5.0.8-bin.jar current/xd/lib && \ + rm -rf mysql-connector-java-5.0.8 && \ + rm -f mysql-connector-java-5.0.8.tar.gz && \ + chown -R springxd:springxd spring-xd-1.3.1.RELEASE + +USER springxd +ENV PATH $PATH:/usr/local/spring-xd/current/xd/bin:/usr/local/spring-xd/current/shell/bin:/usr/local/spring-xd/current/zookeeper/bin +COPY xd-container-logback.groovy /usr/local/spring-xd/current/xd/config/xd-container-logback.groovy +COPY xd-singlenode-logback.groovy /usr/local/spring-xd/current/xd/config/xd-singlenode-logback.groovy +VOLUME ["/usr/local/spring-xd/current/xd/config"] +EXPOSE 9393 + +# Configure Java Library Repositories +ENV PATH $PATH:/usr/local/anaconda2/bin +ENV M2_HOME /usr/local/apache-maven +ENV M2 $M2_HOME/bin +ENV PATH $PATH:$M2 +USER root +COPY maven_settings.xml $M2_HOME/conf/settings.xml +COPY ivy_settings.xml /usr/local/repositories/.groovy/grapeConfig.xml +RUN mkdir -p /usr/local/repositories/.m2 && mkdir -p /usr/local/repositories/.groovy && chown -R springxd:springxd /usr/local/repositories + +# ######################## +# # nexus-ingest code # +# ######################## +WORKDIR /tmp +RUN pwd +COPY install-custom-software.sh /tmp/install-custom-software.sh +RUN /bin/bash install-custom-software.sh +RUN chown -R springxd:springxd /usr/local/spring-xd/spring-xd-1.3.1.RELEASE && \ + chown -R springxd:springxd /usr/local/anaconda2/envs/nexus-xd-python-modules/ && \ + chown -R springxd:springxd /usr/local/repositories +VOLUME ["/usr/local/data/nexus"] + +COPY nexus-ingest.sh /usr/local/nexus-ingest.sh +USER springxd +ENTRYPOINT ["/usr/local/nexus-ingest.sh"] +CMD ["--help"] \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/ingest-base/README.md ---------------------------------------------------------------------- diff --git a/docker/ingest-base/README.md b/docker/ingest-base/README.md new file mode 100644 index 0000000..f86cd8f --- /dev/null +++ b/docker/ingest-base/README.md @@ -0,0 +1,3 @@ +# ingest-base + +This file is used as a base image for the other ingest-* containers in the nexusjpl organization. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/ingest-base/install-custom-software.sh ---------------------------------------------------------------------- diff --git a/docker/ingest-base/install-custom-software.sh b/docker/ingest-base/install-custom-software.sh new file mode 100755 index 0000000..8276fe4 --- /dev/null +++ b/docker/ingest-base/install-custom-software.sh @@ -0,0 +1,82 @@ +scriptdir=`dirname $0` + +homedir="/usr/local/spring-xd/current" +condaenv="nexus-xd-python-modules" + +pushd $homedir +mkdir nexus +pushd nexus +git init +git pull https://github.com/dataplumber/nexus.git +popd + +source activate $condaenv + +# Install spring-xd python module +pushd nexus/nexus-ingest/spring-xd-python +python setup.py install --force +popd + +# Install protobuf generated artifacts +pushd nexus/nexus-ingest/nexus-messages +./gradlew clean build writeNewPom + +pomfile=`find build/poms/*.xml` +jarfile=`find build/libs/*.jar` +mvn install:install-file -DpomFile=$pomfile -Dfile=$jarfile + +pushd build/python/nexusproto +python setup.py install --force +popd +popd + +# Install ingestion modules +pushd nexus/nexus-ingest/nexus-xd-python-modules +python setup.py install --force +popd + +# Install shared Groovy scripts +pushd nexus/nexus-ingest/groovy-scripts +mkdir $homedir/xd-nexus-shared +cp *.groovy $homedir/xd-nexus-shared +popd + +# Start singlenode so we can interact with it +nohup xd-singlenode --hadoopDistro none > /dev/null 2>&1 & + +# Delete all streams in Spring XD so we can update the custom modules +touch /tmp/xdcommand +echo stream all destroy --force > /tmp/xdcommand +until xd-shell --cmdfile /tmp/xdcommand; +do + sleep 1 +done + +# Build and upload dataset-tiler +pushd nexus/nexus-ingest/dataset-tiler +./gradlew clean build +jarfile=`find build/libs/*.jar` +touch /tmp/moduleupload +echo module upload --type processor --name dataset-tiler --file $jarfile --force > /tmp/xdcommand +xd-shell --cmdfile /tmp/xdcommand +popd + +# Build and upload tcp-shell +pushd nexus/nexus-ingest/tcp-shell +./gradlew clean build +jarfile=`find build/libs/*.jar` +touch /tmp/moduleupload +echo module upload --type processor --name tcpshell --file $jarfile --force > /tmp/xdcommand +xd-shell --cmdfile /tmp/xdcommand +popd + +# Build and upload nexus-sink +pushd nexus/nexus-ingest/nexus-sink +./gradlew clean build +jarfile=`find build/libs/*.jar` +touch /tmp/moduleupload +echo module upload --type sink --name nexus --file $jarfile --force > /tmp/xdcommand +xd-shell --cmdfile /tmp/xdcommand +popd + +popd \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/ingest-base/ivy_settings.xml ---------------------------------------------------------------------- diff --git a/docker/ingest-base/ivy_settings.xml b/docker/ingest-base/ivy_settings.xml new file mode 100644 index 0000000..3cbe7c9 --- /dev/null +++ b/docker/ingest-base/ivy_settings.xml @@ -0,0 +1,31 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<ivysettings> + <settings defaultResolver="downloadGrapes"/> + <resolvers> + <chain name="downloadGrapes" returnFirst="true"> + <filesystem name="cachedGrapes"> + <ivy pattern="/usr/local/repositories/.groovy/grapes/[organisation]/[module]/ivy-[revision].xml"/> + <artifact pattern="/usr/local/repositories/.groovy/grapes/[organisation]/[module]/[type]s/[artifact]-[revision](-[classifier]).[ext]"/> + </filesystem> + <ibiblio name="localm2" root="file:/usr/local/repositories/.m2" checkmodified="true" changingPattern=".*" changingMatcher="regexp" m2compatible="true"/> + <!-- todo add 'endorsed groovy extensions' resolver here --> + <ibiblio name="jcenter" root="https://jcenter.bintray.com/" m2compatible="true"/> + <ibiblio name="ibiblio" m2compatible="true"/> + </chain> + </resolvers> +</ivysettings> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/docker/ingest-base/maven_settings.xml ---------------------------------------------------------------------- diff --git a/docker/ingest-base/maven_settings.xml b/docker/ingest-base/maven_settings.xml new file mode 100644 index 0000000..aa97c4c --- /dev/null +++ b/docker/ingest-base/maven_settings.xml @@ -0,0 +1,256 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<!-- + | This is the configuration file for Maven. It can be specified at two levels: + | + | 1. User Level. This settings.xml file provides configuration for a single user, + | and is normally provided in ${user.home}/.m2/settings.xml. + | + | NOTE: This location can be overridden with the CLI option: + | + | -s /path/to/user/settings.xml + | + | 2. Global Level. This settings.xml file provides configuration for all Maven + | users on a machine (assuming they're all using the same Maven + | installation). It's normally provided in + | ${maven.home}/conf/settings.xml. + | + | NOTE: This location can be overridden with the CLI option: + | + | -gs /path/to/global/settings.xml + | + | The sections in this sample file are intended to give you a running start at + | getting the most out of your Maven installation. Where appropriate, the default + | values (values used when the setting is not specified) are provided. + | + |--> +<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd"> + <!-- localRepository + | The path to the local repository maven will use to store artifacts. + | + | Default: ${user.home}/.m2/repository --> + <localRepository>/usr/local/repositories/.m2</localRepository> + + <!-- interactiveMode + | This will determine whether maven prompts you when it needs input. If set to false, + | maven will use a sensible default value, perhaps based on some other setting, for + | the parameter in question. + | + | Default: true + <interactiveMode>true</interactiveMode> + --> + + <!-- offline + | Determines whether maven should attempt to connect to the network when executing a build. + | This will have an effect on artifact downloads, artifact deployment, and others. + | + | Default: false + <offline>false</offline> + --> + + <!-- pluginGroups + | This is a list of additional group identifiers that will be searched when resolving plugins by their prefix, i.e. + | when invoking a command line like "mvn prefix:goal". Maven will automatically add the group identifiers + | "org.apache.maven.plugins" and "org.codehaus.mojo" if these are not already contained in the list. + |--> + <pluginGroups> + <!-- pluginGroup + | Specifies a further group identifier to use for plugin lookup. + <pluginGroup>com.your.plugins</pluginGroup> + --> + </pluginGroups> + + <!-- proxies + | This is a list of proxies which can be used on this machine to connect to the network. + | Unless otherwise specified (by system property or command-line switch), the first proxy + | specification in this list marked as active will be used. + |--> + <proxies> + <!-- proxy + | Specification for one proxy, to be used in connecting to the network. + | + <proxy> + <id>optional</id> + <active>true</active> + <protocol>http</protocol> + <username>proxyuser</username> + <password>proxypass</password> + <host>proxy.host.net</host> + <port>80</port> + <nonProxyHosts>local.net|some.host.com</nonProxyHosts> + </proxy> + --> + </proxies> + + <!-- servers + | This is a list of authentication profiles, keyed by the server-id used within the system. + | Authentication profiles can be used whenever maven must make a connection to a remote server. + |--> + <servers> + <!-- server + | Specifies the authentication information to use when connecting to a particular server, identified by + | a unique name within the system (referred to by the 'id' attribute below). + | + | NOTE: You should either specify username/password OR privateKey/passphrase, since these pairings are + | used together. + | + <server> + <id>deploymentRepo</id> + <username>repouser</username> + <password>repopwd</password> + </server> + --> + + <!-- Another sample, using keys to authenticate. + <server> + <id>siteServer</id> + <privateKey>/path/to/private/key</privateKey> + <passphrase>optional; leave empty if not used.</passphrase> + </server> + --> + </servers> + + <!-- mirrors + | This is a list of mirrors to be used in downloading artifacts from remote repositories. + | + | It works like this: a POM may declare a repository to use in resolving certain artifacts. + | However, this repository may have problems with heavy traffic at times, so people have mirrored + | it to several places. + | + | That repository definition will have a unique id, so we can create a mirror reference for that + | repository, to be used as an alternate download site. The mirror site will be the preferred + | server for that repository. + |--> + <mirrors> + <!-- mirror + | Specifies a repository mirror site to use instead of a given repository. The repository that + | this mirror serves has an ID that matches the mirrorOf element of this mirror. IDs are used + | for inheritance and direct lookup purposes, and must be unique across the set of mirrors. + | + <mirror> + <id>mirrorId</id> + <mirrorOf>repositoryId</mirrorOf> + <name>Human Readable Name for this Mirror.</name> + <url>http://my.repository.com/repo/path</url> + </mirror> + --> + </mirrors> + + <!-- profiles + | This is a list of profiles which can be activated in a variety of ways, and which can modify + | the build process. Profiles provided in the settings.xml are intended to provide local machine- + | specific paths and repository locations which allow the build to work in the local environment. + | + | For example, if you have an integration testing plugin - like cactus - that needs to know where + | your Tomcat instance is installed, you can provide a variable here such that the variable is + | dereferenced during the build process to configure the cactus plugin. + | + | As noted above, profiles can be activated in a variety of ways. One way - the activeProfiles + | section of this document (settings.xml) - will be discussed later. Another way essentially + | relies on the detection of a system property, either matching a particular value for the property, + | or merely testing its existence. Profiles can also be activated by JDK version prefix, where a + | value of '1.4' might activate a profile when the build is executed on a JDK version of '1.4.2_07'. + | Finally, the list of active profiles can be specified directly from the command line. + | + | NOTE: For profiles defined in the settings.xml, you are restricted to specifying only artifact + | repositories, plugin repositories, and free-form properties to be used as configuration + | variables for plugins in the POM. + | + |--> + <profiles> + <!-- profile + | Specifies a set of introductions to the build process, to be activated using one or more of the + | mechanisms described above. For inheritance purposes, and to activate profiles via <activatedProfiles/> + | or the command line, profiles have to have an ID that is unique. + | + | An encouraged best practice for profile identification is to use a consistent naming convention + | for profiles, such as 'env-dev', 'env-test', 'env-production', 'user-jdcasey', 'user-brett', etc. + | This will make it more intuitive to understand what the set of introduced profiles is attempting + | to accomplish, particularly when you only have a list of profile id's for debug. + | + | This profile example uses the JDK version to trigger activation, and provides a JDK-specific repo. + <profile> + <id>jdk-1.4</id> + + <activation> + <jdk>1.4</jdk> + </activation> + + <repositories> + <repository> + <id>jdk14</id> + <name>Repository for JDK 1.4 builds</name> + <url>http://www.myhost.com/maven/jdk14</url> + <layout>default</layout> + <snapshotPolicy>always</snapshotPolicy> + </repository> + </repositories> + </profile> + --> + + <!-- + | Here is another profile, activated by the system property 'target-env' with a value of 'dev', + | which provides a specific path to the Tomcat instance. To use this, your plugin configuration + | might hypothetically look like: + | + | ... + | <plugin> + | <groupId>org.myco.myplugins</groupId> + | <artifactId>myplugin</artifactId> + | + | <configuration> + | <tomcatLocation>${tomcatPath}</tomcatLocation> + | </configuration> + | </plugin> + | ... + | + | NOTE: If you just wanted to inject this configuration whenever someone set 'target-env' to + | anything, you could just leave off the <value/> inside the activation-property. + | + <profile> + <id>env-dev</id> + + <activation> + <property> + <name>target-env</name> + <value>dev</value> + </property> + </activation> + + <properties> + <tomcatPath>/path/to/tomcat/instance</tomcatPath> + </properties> + </profile> + --> + </profiles> + + <!-- activeProfiles + | List of profiles that are active for all builds. + | + <activeProfiles> + <activeProfile>alwaysActiveProfile</activeProfile> + <activeProfile>anotherAlwaysActiveProfile</activeProfile> + </activeProfiles> + --> +</settings> \ No newline at end of file
