added slice by dimension slicer and test
Project: http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/commit/39ea2ef3 Tree: http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/tree/39ea2ef3 Diff: http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/diff/39ea2ef3 Branch: refs/heads/master Commit: 39ea2ef3fb34c173d1ac3bd9baa4a322158c6a7e Parents: 9288979 Author: Frank Greguska <[email protected]> Authored: Thu Jan 4 14:59:03 2018 -0800 Committer: Frank Greguska <[email protected]> Committed: Thu Jan 4 14:59:03 2018 -0800 ---------------------------------------------------------------------- .gitattributes | 2 + .../datatiler/SliceFileByDimension.java | 110 +++++++++++++++++++ .../datatiler/NetCDFItemReaderTest.java | 26 +++-- .../datatiler/SliceFileByDimensionTest.java | 96 ++++++++++++++++ .../datatiler/SliceFileByTilesDesiredTest.java | 23 ++++ ...HRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.0.nc | Bin 1057327 -> 132 bytes ...CMP_Wind_Analysis_20050101_V02.0_L3.0_RSS.nc | 3 + ...SMAP_L2B_SSS_04892_20160101T005507_R13080.h5 | 3 + ...010301_metopb_00588_eps_o_coa_2101_ovw.l2.nc | 3 + 9 files changed, 256 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/blob/39ea2ef3/.gitattributes ---------------------------------------------------------------------- diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b7b347e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.nc filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/blob/39ea2ef3/src/main/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByDimension.java ---------------------------------------------------------------------- diff --git a/src/main/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByDimension.java b/src/main/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByDimension.java new file mode 100644 index 0000000..003edaa --- /dev/null +++ b/src/main/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByDimension.java @@ -0,0 +1,110 @@ +/* + * **************************************************************************** + * Copyright (c) 2016 Jet Propulsion Laboratory, + * California Institute of Technology. All rights reserved + * ****************************************************************************/ +package gov.nasa.jpl.nexus.ningester.datatiler; + +import com.google.common.collect.Sets; +import ucar.nc2.Dimension; +import ucar.nc2.Variable; +import ucar.nc2.dataset.NetcdfDataset; + +import java.io.File; +import java.io.IOException; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +class SliceFileByDimension implements FileSlicer { + + private String sliceByDimension; + private List<String> dimensions; + private String dimensionNamePrefix; + + public void setDimensions(List<String> dims) { + this.dimensions = dims; + } + + public void setSliceByDimension(String sliceBy) { + this.sliceByDimension = sliceBy; + } + + public void setDimensionNamePrefix(String dimensionNamePrefix) { + this.dimensionNamePrefix = dimensionNamePrefix; + } + + public List<String> generateSlices(File inputfile) throws IOException { + + boolean isInteger = false; + try { + Integer.parseInt(this.sliceByDimension); + isInteger = true; + } catch (NumberFormatException e) { + //ignore + } + return (isInteger) ? indexedDimensionSlicing(inputfile) : namedDimensionSlicing(inputfile); + } + + List<String> indexedDimensionSlicing(File inputfile) throws IOException { + Map<String, Integer> dimensionNameToLength; + try (NetcdfDataset ds = NetcdfDataset.openDataset(inputfile.getAbsolutePath())) { + + + // Because this is indexed-based dimension slicing, the dimensions are assumed to be unlimited with no names (ie. ds.dimensions == []) + // Therefore, we need to find a 'representative' variable with dimensions that we can inspect and work with + // 'lat' and 'lon' are common variable names in the datasets we work with. So try to find one of those first + // Otherwise, just find the first variable that has the same number of dimensions as was given in this.dimensions + List<String> commonVariableNames = Arrays.asList("lat", "latitude", "lon", "longitude"); + Optional<Variable> var = ds.getVariables().stream() + .filter(variable -> commonVariableNames.contains(variable.getShortName().toLowerCase()) + || variable.getDimensions().size() == this.dimensions.size()) + .findFirst(); + + assert var.isPresent() : "Could not find a variable in " + inputfile.getName() + " with " + dimensions.size() + " dimension(s)."; + + dimensionNameToLength = IntStream.range(0, this.dimensions.size()).boxed() + .collect(Collectors.toMap(dimIndex -> this.dimensionNamePrefix + dimIndex, dimIndex -> var.get().getDimension(dimIndex).getLength())); + } + + return generateTileBoundrySlices(this.dimensionNamePrefix + this.sliceByDimension, dimensionNameToLength); + + } + + List<String> namedDimensionSlicing(File inputfile) throws IOException { + Map<String, Integer> dimensionNameToLength; + try (NetcdfDataset ds = NetcdfDataset.openDataset(inputfile.getAbsolutePath())) { + + dimensionNameToLength = ds.getDimensions().stream() + .filter(dimension -> this.dimensions.contains(dimension.getShortName())) + .collect(Collectors.toMap(Dimension::getShortName, Dimension::getLength)); + } + + return generateTileBoundrySlices(this.sliceByDimension, dimensionNameToLength); + } + + List<String> generateTileBoundrySlices(String sliceByDimension, Map<String, Integer> dimensionNameToLength) { + + List<Set<String>> dimensionBounds = dimensionNameToLength.entrySet().stream() + .map(stringIntegerEntry -> { + String dimensionName = stringIntegerEntry.getKey(); + Integer lengthOfDimension = stringIntegerEntry.getValue(); + Integer stepSize = (dimensionName.equals(sliceByDimension)) ? 1 : lengthOfDimension; + + Set<String> bounds = new LinkedHashSet<>(); + for (int i = 0; i < lengthOfDimension; i += stepSize) { + bounds.add( + dimensionName + ":" + + i + ":" + + (i + stepSize >= lengthOfDimension ? lengthOfDimension : i + stepSize)); + } + return bounds; + }).collect(Collectors.toList()); + + return Sets.cartesianProduct(dimensionBounds) + .stream() + .map(tileSpecAsList -> tileSpecAsList.stream().collect(Collectors.joining(","))) + .collect(Collectors.toList()); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/blob/39ea2ef3/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/NetCDFItemReaderTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/NetCDFItemReaderTest.java b/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/NetCDFItemReaderTest.java index 57d4c89..d677a45 100644 --- a/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/NetCDFItemReaderTest.java +++ b/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/NetCDFItemReaderTest.java @@ -6,18 +6,18 @@ import org.springframework.batch.item.ExecutionContext; import org.springframework.core.io.ClassPathResource; import org.springframework.core.io.Resource; -import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -public class NetCDFItemReaderTest { +public class NetCDFItemReaderTest { @Test - public void testOpen() throws IOException { + public void testOpen() { SliceFileByTilesDesired slicer = new SliceFileByTilesDesired(); slicer.setTilesDesired(5184); slicer.setDimensions(Arrays.asList("lat", "lon")); @@ -32,7 +32,7 @@ public class NetCDFItemReaderTest { } @Test - public void testRead() throws Exception { + public void testReadOne() throws Exception { SliceFileByTilesDesired slicer = new SliceFileByTilesDesired(); slicer.setTilesDesired(5184); slicer.setDimensions(Arrays.asList("lat", "lon")); @@ -52,9 +52,11 @@ public class NetCDFItemReaderTest { } @Test - public void testReadWithTime() throws Exception { + public void testReadAll() { + Integer tilesDesired = 5184; + SliceFileByTilesDesired slicer = new SliceFileByTilesDesired(); - slicer.setTilesDesired(5184); + slicer.setTilesDesired(tilesDesired); slicer.setDimensions(Arrays.asList("lat", "lon")); slicer.setTimeDimension("time"); @@ -65,10 +67,14 @@ public class NetCDFItemReaderTest { ExecutionContext context = new ExecutionContext(); reader.open(context); - NexusContent.NexusTile result = reader.read(); + List<NexusContent.NexusTile> results = new ArrayList<>(); + NexusContent.NexusTile result; + while ((result = reader.read()) != null) { + results.add(result); + } - assertThat(result.getSummary().getSectionSpec(), is("time:0:1,lat:0:10,lon:0:20")); - assertThat(result.getSummary().getGranule(), is(testResource.getURL().toString())); + assertThat(results.size(), is(tilesDesired)); } + } http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/blob/39ea2ef3/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByDimensionTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByDimensionTest.java b/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByDimensionTest.java new file mode 100644 index 0000000..26d6059 --- /dev/null +++ b/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByDimensionTest.java @@ -0,0 +1,96 @@ +/* + * **************************************************************************** + * Copyright (c) 2016 Jet Propulsion Laboratory, + * California Institute of Technology. All rights reserved + * ****************************************************************************/ +package gov.nasa.jpl.nexus.ningester.datatiler; + +import org.junit.Test; +import org.springframework.core.io.ClassPathResource; +import org.springframework.core.io.Resource; + +import java.io.IOException; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import static junit.framework.Assert.assertEquals; +import static org.hamcrest.CoreMatchers.hasItems; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + +public class SliceFileByDimensionTest { + + @Test + public void testGenerateTileBoundrySlices() { + + SliceFileByDimension slicer = new SliceFileByDimension(); + slicer.setSliceByDimension("NUMROWS"); + + Map<String, Integer> dimensionNameToLength = new LinkedHashMap<>(); + dimensionNameToLength.put("NUMROWS", 3163); + dimensionNameToLength.put("NUMCELLS", 82); + + List<String> result = slicer.generateTileBoundrySlices("NUMROWS", dimensionNameToLength); + + assertEquals(3163, result.size()); + + assertThat(result, hasItems("NUMROWS:0:1,NUMCELLS:0:82", "NUMROWS:1:2,NUMCELLS:0:82", "NUMROWS:3162:3163,NUMCELLS:0:82")); + + } + + @Test + public void testGenerateTileBoundrySlices2() { + + SliceFileByDimension slicer = new SliceFileByDimension(); + slicer.setSliceByDimension("NUMROWS"); + + Map<String, Integer> dimensionNameToLength = new LinkedHashMap<>(); + dimensionNameToLength.put("NUMROWS", 2); + dimensionNameToLength.put("NUMCELLS", 82); + + List<String> result = slicer.generateTileBoundrySlices("NUMROWS", dimensionNameToLength); + + assertEquals(2, result.size()); + + assertThat(result, hasItems("NUMROWS:0:1,NUMCELLS:0:82", "NUMROWS:1:2,NUMCELLS:0:82")); + + } + + @Test + public void testGenerateSlicesByInteger() throws IOException { + + + Integer expectedTiles = 1624; + + SliceFileByDimension slicer = new SliceFileByDimension(); + slicer.setDimensions(Arrays.asList("0", "1")); + slicer.setSliceByDimension("1"); + + Resource testResource = new ClassPathResource("granules/SMAP_L2B_SSS_04892_20160101T005507_R13080.h5"); + + List<String> results = slicer.generateSlices(testResource.getFile()); + + assertThat(results.size(), is(expectedTiles)); + + } + + @Test + public void testGenerateSlicesByName() throws IOException { + + + Integer expectedTiles = 3163; + + SliceFileByDimension slicer = new SliceFileByDimension(); + slicer.setDimensions(Arrays.asList("NUMROWS", "NUMCELLS")); + slicer.setSliceByDimension("NUMROWS"); + + Resource testResource = new ClassPathResource("granules/ascat_20121029_010301_metopb_00588_eps_o_coa_2101_ovw.l2.nc"); + + List<String> results = slicer.generateSlices(testResource.getFile()); + + assertThat(results.size(), is(expectedTiles)); + + } +} http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/blob/39ea2ef3/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByTilesDesiredTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByTilesDesiredTest.java b/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByTilesDesiredTest.java index ebb2c44..ff0077c 100644 --- a/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByTilesDesiredTest.java +++ b/src/test/java/gov/nasa/jpl/nexus/ningester/datatiler/SliceFileByTilesDesiredTest.java @@ -6,12 +6,17 @@ package gov.nasa.jpl.nexus.ningester.datatiler; import org.junit.Test; +import org.springframework.core.io.ClassPathResource; +import org.springframework.core.io.Resource; +import java.io.IOException; +import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import static org.hamcrest.CoreMatchers.hasItems; +import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsInAnyOrder; @@ -153,4 +158,22 @@ public class SliceFileByTilesDesiredTest { assertThat(result, hasItems("time:0:1,lat:0:249,lon:0:500", "time:0:1,lat:0:249,lon:500:1000", "time:0:1,lat:17928:17999,lon:35500:36000")); } + + @Test + public void testGenerateSlicesCcmp() throws IOException { + Integer tilesDesired = 270; + Integer expectedTiles = 289 * 4; // 4 time slices and 289 tiles per time slice + + SliceFileByTilesDesired slicer = new SliceFileByTilesDesired(); + slicer.setTilesDesired(tilesDesired); + slicer.setDimensions(Arrays.asList("latitude", "longitude")); + slicer.setTimeDimension("time"); + + Resource testResource = new ClassPathResource("granules/CCMP_Wind_Analysis_20050101_V02.0_L3.0_RSS.nc"); + + List<String> results = slicer.generateSlices(testResource.getFile()); + + assertThat(results.size(), is(expectedTiles)); + + } } http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/blob/39ea2ef3/src/test/resources/granules/20050101120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.0.nc ---------------------------------------------------------------------- diff --git a/src/test/resources/granules/20050101120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.0.nc b/src/test/resources/granules/20050101120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.0.nc index 4935c81..6996466 100644 Binary files a/src/test/resources/granules/20050101120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.0.nc and b/src/test/resources/granules/20050101120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.0.nc differ http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/blob/39ea2ef3/src/test/resources/granules/CCMP_Wind_Analysis_20050101_V02.0_L3.0_RSS.nc ---------------------------------------------------------------------- diff --git a/src/test/resources/granules/CCMP_Wind_Analysis_20050101_V02.0_L3.0_RSS.nc b/src/test/resources/granules/CCMP_Wind_Analysis_20050101_V02.0_L3.0_RSS.nc new file mode 100644 index 0000000..2a26eda --- /dev/null +++ b/src/test/resources/granules/CCMP_Wind_Analysis_20050101_V02.0_L3.0_RSS.nc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f2e9cadd9fb34eab7027c127930a3774a1741948bb0818f30b475d6a13c707 +size 27115416 http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/blob/39ea2ef3/src/test/resources/granules/SMAP_L2B_SSS_04892_20160101T005507_R13080.h5 ---------------------------------------------------------------------- diff --git a/src/test/resources/granules/SMAP_L2B_SSS_04892_20160101T005507_R13080.h5 b/src/test/resources/granules/SMAP_L2B_SSS_04892_20160101T005507_R13080.h5 new file mode 100644 index 0000000..8a5d950 --- /dev/null +++ b/src/test/resources/granules/SMAP_L2B_SSS_04892_20160101T005507_R13080.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf7dc1af914d0be1ba27cf9bc9b77964a587e2273475185f8428e3f65f7cd42 +size 18672352 http://git-wip-us.apache.org/repos/asf/incubator-sdap-ningester/blob/39ea2ef3/src/test/resources/granules/ascat_20121029_010301_metopb_00588_eps_o_coa_2101_ovw.l2.nc ---------------------------------------------------------------------- diff --git a/src/test/resources/granules/ascat_20121029_010301_metopb_00588_eps_o_coa_2101_ovw.l2.nc b/src/test/resources/granules/ascat_20121029_010301_metopb_00588_eps_o_coa_2101_ovw.l2.nc new file mode 100644 index 0000000..029e9de --- /dev/null +++ b/src/test/resources/granules/ascat_20121029_010301_metopb_00588_eps_o_coa_2101_ovw.l2.nc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:359f98fd94e90747f9d84af37387fb66e5c222e37fa45221c870a35dd02fad38 +size 8305456
