Author: mattmann
Date: Mon Jan 16 03:26:27 2012
New Revision: 1231816
URL: http://svn.apache.org/viewvc?rev=1231816&view=rev
Log:
- fix for OODT-368 Refactoring of metadata extraction functionality for
opendapps module (contributed by Luca Cinquini)
Added:
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/DasMetadataExtractor.java
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/MetadataExtractor.java
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/NcmlMetadataExtractor.java
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/ThreddsMetadataExtractor.java
Modified:
oodt/trunk/CHANGES.txt
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/DatasetCrawler.java
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/OpendapProfileHandler.java
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/util/ProfileUtils.java
Modified: oodt/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/oodt/trunk/CHANGES.txt?rev=1231816&r1=1231815&r2=1231816&view=diff
==============================================================================
--- oodt/trunk/CHANGES.txt (original)
+++ oodt/trunk/CHANGES.txt Mon Jan 16 03:26:27 2012
@@ -4,6 +4,9 @@ Apache OODT Change Log
Release 0.4: Current Development
--------------------------------------------
+* OODT-368 Refactoring of metadata extraction functionality for
+ opendapps module (Luca Cinquini, mattmann)
+
* OODT-366 Extension to opendapps module to extract ALL variables
in DDS stream (Luca Cinquini, mattmann)
Modified:
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/DatasetCrawler.java
URL:
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/DatasetCrawler.java?rev=1231816&r1=1231815&r2=1231816&view=diff
==============================================================================
---
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/DatasetCrawler.java
(original)
+++
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/DatasetCrawler.java
Mon Jan 16 03:26:27 2012
@@ -19,41 +19,24 @@ package org.apache.oodt.opendapps;
//JDK imports
import java.net.URL;
-import java.text.SimpleDateFormat;
-import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.TimeZone;
import java.util.Vector;
import java.util.logging.Level;
import java.util.logging.Logger;
-//OODT imports
import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.oodt.opendapps.extractors.MetadataExtractor;
+import org.apache.oodt.opendapps.extractors.ThreddsMetadataExtractor;
-//Spring imports
-import org.springframework.util.StringUtils;
-
-//OPeNDAP/THREDDS imports
import thredds.catalog.InvAccess;
import thredds.catalog.InvCatalogRef;
import thredds.catalog.InvDataset;
-import thredds.catalog.InvDocumentation;
-import thredds.catalog.InvProperty;
import thredds.catalog.InvService;
import thredds.catalog.ServiceType;
-import thredds.catalog.ThreddsMetadata.Contributor;
-import thredds.catalog.ThreddsMetadata.GeospatialCoverage;
-import thredds.catalog.ThreddsMetadata.Range;
-import thredds.catalog.ThreddsMetadata.Source;
-import thredds.catalog.ThreddsMetadata.Variable;
-import thredds.catalog.ThreddsMetadata.Variables;
-import thredds.catalog.ThreddsMetadata.Vocab;
import thredds.catalog.crawl.CatalogCrawler;
-import ucar.nc2.units.DateType;
-import ucar.unidata.geoloc.LatLonRect;
/**
* Crawls a catalog and returns all the datasets and their references.
@@ -107,7 +90,12 @@ public class DatasetCrawler implements C
if (service.getServiceType()==ServiceType.OPENDAP) {
LOG.log(Level.INFO, "Found OpenDAP access URL: "+
single.getUrlPath());
String opendapurl = this.datasetURL + single.getUrlPath();
- this.datasetMet.put(opendapurl, this.extractDatasetMet(dd));
+ // extract metadata from THREDDS catalog
+ MetadataExtractor extractor = new ThreddsMetadataExtractor(dd);
+ Metadata met = new Metadata();
+ extractor.extract(met);
+ // index metadata by opendap access URL
+ this.datasetMet.put(opendapurl, met);
this.urls.add(opendapurl);
break;
}
@@ -134,191 +122,4 @@ public class DatasetCrawler implements C
return this.datasetMet;
}
- private Metadata extractDatasetMet(InvDataset dataset) {
-
- LOG.log(Level.INFO, "Crawling catalog URL=" + dataset.getCatalogUrl()+"
dataset ID="+dataset.getID());
-
- Metadata met = new Metadata();
- this.addIfNotNull(met, "Authority", dataset.getAuthority());
- this.addIfNotNull(met, "CatalogUrl", dataset.getCatalogUrl());
- this.addIfNotNull(met, "DatasetFullName", dataset.getFullName());
- if (dataset.getContributors() != null) {
- for (Contributor contributor : dataset.getContributors()) {
- this.addIfNotNull(met, "Contributor", contributor.getName());
- }
- }
-
- if (dataset.getCreators() != null) {
- for (Source source : dataset.getCreators()) {
- this.addIfNotNull(met, "Creator", source.getName());
- }
- }
-
- if (dataset.getDataFormatType() != null){
- this.addIfNotNull(met, "DataFormatType", dataset.getDataFormatType()
- .toString());
- }
-
- if (dataset.getDataType() != null){
- this.addIfNotNull(met, "DataType", dataset.getDataType().toString());
- }
-
- if (dataset.getDates() != null) {
- for (DateType dateType : dataset.getDates()) {
- String dateString = null;
- try {
- dateString = toISO8601(dateType.getDate());
- } catch (Exception e) {
- LOG.log(Level.WARNING, "Error converting date: ["
- + dateType.getDate() + "]: Message: " + e.getMessage());
- }
- this.addIfNotNull(met, "Dates", dateString);
- }
- }
-
- if (dataset.getDocumentation() != null) {
- for (InvDocumentation doc : dataset.getDocumentation()) {
- this.addIfNotNull(met, "Documentation", doc.getInlineContent());
- }
- }
-
- this.addIfNotNull(met, "FullName", dataset.getFullName());
- GeospatialCoverage geoCoverage = dataset.getGeospatialCoverage();
- if (geoCoverage != null) {
- LatLonRect bbox = geoCoverage.getBoundingBox();
- if (bbox != null) {
- this.addIfNotNull(met, "SouthwestBC", bbox.getLowerLeftPoint()
- .toString());
- this.addIfNotNull(met, "NorthwestBC", bbox.getUpperLeftPoint()
- .toString());
- this.addIfNotNull(met, "NortheastBC", bbox.getUpperRightPoint()
- .toString());
- this.addIfNotNull(met, "SoutheastBC", bbox.getLowerRightPoint()
- .toString());
- } else {
- // try north south, east west
- if (geoCoverage.getNorthSouthRange() != null) {
- Range nsRange = geoCoverage.getNorthSouthRange();
- this.addIfNotNull(met, "NorthSouthRangeStart", String.valueOf(nsRange
- .getStart()));
- this.addIfNotNull(met, "NorthSouthRangeResolution", String
- .valueOf(nsRange.getResolution()));
- this.addIfNotNull(met, "NorthSouthRangeSize", String.valueOf(nsRange
- .getSize()));
- this.addIfNotNull(met, "NorthSouthRangeUnits", nsRange.getUnits());
- }
-
- if (geoCoverage.getEastWestRange() != null) {
- Range nsRange = geoCoverage.getEastWestRange();
- this.addIfNotNull(met, "EastWestRangeStart", String.valueOf(nsRange
- .getStart()));
- this.addIfNotNull(met, "EastWestRangeResolution", String
- .valueOf(nsRange.getResolution()));
- this.addIfNotNull(met, "EastWestRangeSize", String.valueOf(nsRange
- .getSize()));
- this.addIfNotNull(met, "EastWestRangeUnits", nsRange.getUnits());
- }
- }
-
- this.addIfNotNull(met, "GeospatialCoverageLatitudeResolution", String
- .valueOf(dataset.getGeospatialCoverage().getLatResolution()));
- this.addIfNotNull(met, "GeospatialCoverageLongitudeResolution", String
- .valueOf(dataset.getGeospatialCoverage().getLonResolution()));
-
- if(dataset.getGeospatialCoverage().getNames() != null){
- for(Vocab gName: dataset.getGeospatialCoverage().getNames()){
- this.addIfNotNull(met, "GeospatialCoverage", gName.getText());
- }
- }
-
- }
-
- this.addIfNotNull(met, "History", dataset.getHistory());
- this.addIfNotNull(met, "ID", dataset.getID());
- if (dataset.getKeywords() != null) {
- for (Vocab vocab : dataset.getKeywords()) {
- this.addIfNotNull(met, "Keywords", vocab.getText());
- }
- }
- this.addIfNotNull(met, "Name", dataset.getName());
- this.addIfNotNull(met, "Processing", dataset.getProcessing());
- if (dataset.getProjects() != null) {
- for (Vocab vocab : dataset.getProjects()) {
- this.addIfNotNull(met, "Projects", vocab.getText());
- }
- }
-
- if (dataset.getProperties() != null) {
- for (InvProperty prop : dataset.getProperties()) {
- this.addIfNotNull(met, prop.getName(), prop.getValue());
- }
- }
-
- if (dataset.getPublishers() != null) {
- for (Source source : dataset.getPublishers()) {
- this.addIfNotNull(met, "Publishers", source.getName());
- }
- }
-
- this.addIfNotNull(met, "RestrictAccess", dataset.getRestrictAccess());
- this.addIfNotNull(met, "Rights", dataset.getRights());
- this.addIfNotNull(met, "Summary", dataset.getSummary());
- if (dataset.getTimeCoverage() != null) {
- String startDateTimeStr = null, endDateTimeStr = null;
- try {
- startDateTimeStr = toISO8601(dataset.getTimeCoverage()
- .getStart().getDate());
- endDateTimeStr = toISO8601(dataset.getTimeCoverage()
- .getEnd().getDate());
- } catch (Exception e) {
- LOG.log(Level.WARNING,
- "Error converting start/end date time strings: Message: "
- + e.getMessage());
- }
-
- this.addIfNotNull(met, "StartDateTime", startDateTimeStr);
- this.addIfNotNull(met, "EndDateTime", endDateTimeStr);
- }
-
- if (dataset.getTimeCoverage() != null &&
dataset.getTimeCoverage().getResolution() != null) {
- this.addIfNotNull(met, "TimeCoverageResolution", dataset
- .getTimeCoverage().getResolution().getText());
- }
- // dataset unique ID
- if (StringUtils.hasText(dataset.getUniqueID()) &&
!dataset.getUniqueID().equalsIgnoreCase("null")) {
- // note: globally unique ID, or string "null" if missing authority or ID
- this.addIfNotNull(met, "UniqueID", dataset.getUniqueID());
- } else {
- // dataset ID is typically not null
- this.addIfNotNull(met, "UniqueID", dataset.getID());
- }
-
- if (dataset.getVariables() != null) {
- for (Variables vars : dataset.getVariables()) {
- if (vars.getVariableList() != null) {
- for (Variable var : vars.getVariableList()) {
- this.addIfNotNull(met, "Variables", var.getName());
- }
- }
- }
- }
- return met;
- }
-
- private void addIfNotNull(Metadata met, String field, String value) {
- if (value != null && !value.equals("")) {
- met.addMetadata(field, value);
- }
- }
-
- // inspired from ASLv2 code at:
- // http://www.java2s.com/Code/Java/Data-Type/ISO8601dateparsingutility.htm
- private String toISO8601(Date date) {
- SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
- TimeZone tz = TimeZone.getTimeZone("UTC");
- df.setTimeZone(tz);
- String output = df.format(date);
- return output;
- }
-
}
Modified:
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/OpendapProfileHandler.java
URL:
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/OpendapProfileHandler.java?rev=1231816&r1=1231815&r2=1231816&view=diff
==============================================================================
---
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/OpendapProfileHandler.java
(original)
+++
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/OpendapProfileHandler.java
Mon Jan 16 03:26:27 2012
@@ -27,14 +27,16 @@ import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-//OPeNDAP/THREDDS imports
import opendap.dap.DConnect;
-//APACHE imports
import org.apache.oodt.cas.metadata.Metadata;
import org.apache.oodt.opendapps.config.DapRoot;
import org.apache.oodt.opendapps.config.OpendapConfig;
import org.apache.oodt.opendapps.config.OpendapConfigReader;
+import org.apache.oodt.opendapps.extractors.DasMetadataExtractor;
+import org.apache.oodt.opendapps.extractors.MetadataExtractor;
+import org.apache.oodt.opendapps.extractors.NcmlMetadataExtractor;
+import org.apache.oodt.opendapps.extractors.ThreddsMetadataExtractor;
import org.apache.oodt.opendapps.util.ProfileUtils;
import org.apache.oodt.profile.Profile;
import org.apache.oodt.profile.ProfileException;
@@ -60,6 +62,9 @@ public class OpendapProfileHandler imple
public OpendapProfileHandler(){
}
+ /**
+ * Implementation of interface method
+ */
public List<Profile> findProfiles(XMLQuery xmlQuery) throws ProfileException
{
String configFileLoc = null;
String q = xmlQuery.getKwdQueryString();
@@ -89,6 +94,7 @@ public class OpendapProfileHandler imple
List<Profile> profiles = new Vector<Profile>();
List<DapRoot> roots = this.conf.getRoots();
+ // loop over THREDDS catalogs
for (DapRoot root : roots) {
LOG.log(Level.INFO,"Parsing DapRoot="+root.getDatasetUrl());
@@ -113,15 +119,39 @@ public class OpendapProfileHandler imple
+ "]: Message: " + e.getMessage());
}
- Metadata datasetMet = d.getDatasetMet(opendapUrl);
+ // retrieve already extracted THREDDS metadata
+ Metadata datasetMet = d.getDatasetMet(opendapUrl);
+
+ // extract DAS metadata
+ MetadataExtractor dasExtractor = new DasMetadataExtractor(dConn);
+ dasExtractor.extract(datasetMet);
+
+ // extract NcML metadata, if available
+ if
(datasetMet.containsKey(ThreddsMetadataExtractor.SERVICE_TYPE_NCML)) {
+ // retrieve URL of NcML document, previously stored
+ final String ncmlUrl =
datasetMet.getMetadata(ThreddsMetadataExtractor.SERVICE_TYPE_NCML);
+ MetadataExtractor ncmlExtractor = new
NcmlMetadataExtractor(ncmlUrl);
+ ncmlExtractor.extract(datasetMet);
+ }
+
+ // debug: write out all metadata entries
+ for (String key : datasetMet.getAllKeys()) {
+ LOG.log(Level.FINE, "Metadata key="+key+"
value="+datasetMet.getMetadata(key));
+ }
+
+ // <resAttributes>
profile.setResourceAttributes(ProfileUtils.getResourceAttributes(
this.conf, opendapUrl, dConn, datasetMet));
+ // <profAttributes>
profile.setProfileAttributes(ProfileUtils
.getProfileAttributes(this.conf, datasetMet));
+ // <profElement>
profile.getProfileElements().putAll(
ProfileUtils.getProfileElements(this.conf, dConn, datasetMet,
profile));
profiles.add(profile);
LOG.log(Level.INFO, "Added profile
id="+profile.getProfileAttributes().getID());
+
+
} catch(Exception e) {
// in case of exception, don't harvest this dataset, but keep
going
LOG.log(Level.WARNING,"Error while building profile for
opendapurl="+opendapUrl);
Added:
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/DasMetadataExtractor.java
URL:
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/DasMetadataExtractor.java?rev=1231816&view=auto
==============================================================================
---
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/DasMetadataExtractor.java
(added)
+++
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/DasMetadataExtractor.java
Mon Jan 16 03:26:27 2012
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oodt.opendapps.extractors;
+
+//JDK imports
+import java.util.Enumeration;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+//OPENDAP imports
+import opendap.dap.Attribute;
+import opendap.dap.AttributeTable;
+import opendap.dap.DAS;
+import opendap.dap.DConnect;
+
+//OODT imports
+import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.oodt.opendapps.util.ProfileUtils;
+
+/**
+ * Implementation of {@link MetadataExtractor} to extract metadata from an
+ * OpenDAP DAS source. Currently this class only extracts metadata from the
+ * NetCDF global attributes of type String, disregarding all others.
+ *
+ * @author Luca Cinquini
+ *
+ */
+public class DasMetadataExtractor implements MetadataExtractor {
+
+ // prefix for all NetCDF global attributes
+ public static final String NC_GLOBAL = "NC_GLOBAL";
+
+ // NetCDF data types
+ public static final int INT32_TYPE = 6;
+ public static final int INT64_TYPE = 7;
+ public static final int FLOAT32_TYPE = 8;
+ public static final int FLOAT64_TYPE = 9;
+ public static final int STRING_TYPE = 10;
+
+ private static Logger LOG = Logger.getLogger(DasMetadataExtractor.class
+ .getName());
+
+ /**
+ * The DAS stream which is the metadata source.
+ */
+ private final DConnect dConn;
+
+ public DasMetadataExtractor(DConnect dConn) {
+ this.dConn = dConn;
+ }
+
+ /**
+ * The main metadata extraction method.
+ *
+ * @param metadata
+ * : the metadata target, specifically the CAS metadata container.
+ */
+ public void extract(Metadata metadata) {
+
+ LOG.log(Level.INFO, "Parsing DAS metadata from: " + dConn.URL());
+
+ try {
+ DAS das = dConn.getDAS();
+ @SuppressWarnings("unchecked")
+ Enumeration<String> names = das.getNames();
+ while (names.hasMoreElements()) {
+ String attName = (String) names.nextElement();
+ LOG.log(Level.FINE, "Extracting DAS attribute: " + attName);
+
+ AttributeTable at = das.getAttributeTable(attName);
+ Enumeration e = at.getNames();
+ while (e.hasMoreElements()) {
+ String key = (String) e.nextElement();
+ Attribute att = at.getAttribute(key);
+ LOG.log(Level.FINER,
+ "\t" + att.getName() + " value=" + att.getValueAt(0) + "type="
+ + att.getType());
+
+ // store NetCDF global attributes
+ if (attName.equals(NC_GLOBAL)) {
+ if (att.getType() == STRING_TYPE) {
+ ProfileUtils.addIfNotExisting(metadata, key, att.getValues());
+ }
+ }
+ }
+
+ }
+ } catch (Exception e) {
+ LOG.log(Level.WARNING, "Error parsing DAS metadata: " + e.getMessage());
+ }
+
+ }
+
+}
Added:
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/MetadataExtractor.java
URL:
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/MetadataExtractor.java?rev=1231816&view=auto
==============================================================================
---
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/MetadataExtractor.java
(added)
+++
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/MetadataExtractor.java
Mon Jan 16 03:26:27 2012
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oodt.opendapps.extractors;
+
+//OODT imports
+import org.apache.oodt.cas.metadata.Metadata;
+
+/**
+ * Interface for extracting metadata from a generic web accessible resource
into
+ * a CAS metadata container. Each implementation class must be responsible for
+ * instantiating and accessing the specific metadata source as appropriate.
+ *
+ * @author Luca Cinquini
+ *
+ */
+public interface MetadataExtractor {
+
+ /**
+ * Method to (further) populate the metadata container. Any extracted
metadata
+ * is added to the current metadata content.
+ *
+ * @param metadata
+ */
+ void extract(Metadata metadata);
+
+}
Added:
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/NcmlMetadataExtractor.java
URL:
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/NcmlMetadataExtractor.java?rev=1231816&view=auto
==============================================================================
---
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/NcmlMetadataExtractor.java
(added)
+++
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/NcmlMetadataExtractor.java
Mon Jan 16 03:26:27 2012
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oodt.opendapps.extractors;
+
+//JDK imports
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+//OODT imports
+import org.apache.oodt.cas.metadata.Metadata;
+
+/**
+ * Implementation of {@link MetadataExtractor} that parses an NcML XML
document.
+ * Currently this class is simply a stub that doesn't do anything.
+ *
+ * @author Luca Cinquini
+ *
+ */
+public class NcmlMetadataExtractor implements MetadataExtractor {
+
+ private final String ncmlUrl;
+
+ private static Logger LOG = Logger.getLogger(NcmlMetadataExtractor.class
+ .getName());
+
+ public NcmlMetadataExtractor(String ncmlUrl) {
+ this.ncmlUrl = ncmlUrl;
+ }
+
+ /**
+ * Stub implementation of interface method.
+ */
+ public void extract(Metadata metadata) {
+
+ LOG.log(Level.INFO, "Parsing NcML metadata from: " + ncmlUrl);
+
+ }
+
+}
Added:
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/ThreddsMetadataExtractor.java
URL:
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/ThreddsMetadataExtractor.java?rev=1231816&view=auto
==============================================================================
---
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/ThreddsMetadataExtractor.java
(added)
+++
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/ThreddsMetadataExtractor.java
Mon Jan 16 03:26:27 2012
@@ -0,0 +1,268 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oodt.opendapps.extractors;
+
+//JDK imports
+import java.util.UUID;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+//OODT imports
+import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.oodt.opendapps.util.ProfileUtils;
+
+//Spring imports
+import org.springframework.util.StringUtils;
+
+//THREDDS imports
+import thredds.catalog.InvAccess;
+import thredds.catalog.InvDataset;
+import thredds.catalog.InvDocumentation;
+import thredds.catalog.InvProperty;
+import thredds.catalog.ThreddsMetadata.Contributor;
+import thredds.catalog.ThreddsMetadata.GeospatialCoverage;
+import thredds.catalog.ThreddsMetadata.Range;
+import thredds.catalog.ThreddsMetadata.Source;
+import thredds.catalog.ThreddsMetadata.Variable;
+import thredds.catalog.ThreddsMetadata.Variables;
+import thredds.catalog.ThreddsMetadata.Vocab;
+import ucar.nc2.units.DateType;
+import ucar.unidata.geoloc.LatLonRect;
+
+/**
+ * Implementation of {@link MetadataExtractor} that extracts metadata from a
+ * Thredds dataset.
+ *
+ * @author Luca Cinquini
+ *
+ */
+public class ThreddsMetadataExtractor implements MetadataExtractor {
+
+ private static Logger LOG = Logger.getLogger(ThreddsMetadataExtractor.class
+ .getName());
+
+ // constant missing for 4.2 version of NetCDF library
+ public final static String SERVICE_TYPE_NCML = "NCML";
+
+ /**
+ * The source of metadata to be extracted.
+ */
+ private final InvDataset dataset;
+
+ public ThreddsMetadataExtractor(final InvDataset dataset) {
+ this.dataset = dataset;
+ }
+
+ public void extract(Metadata met) {
+
+ LOG.log(Level.INFO, "Crawling catalog URL=" + dataset.getCatalogUrl()
+ + " dataset ID=" + dataset.getID());
+
+ ProfileUtils.addIfNotNull(met, "Authority", dataset.getAuthority());
+ ProfileUtils.addIfNotNull(met, "CatalogUrl", dataset.getCatalogUrl());
+ ProfileUtils.addIfNotNull(met, "DatasetFullName", dataset.getFullName());
+ if (dataset.getContributors() != null) {
+ for (Contributor contributor : dataset.getContributors()) {
+ ProfileUtils.addIfNotNull(met, "Contributor", contributor.getName());
+ }
+ }
+
+ if (dataset.getCreators() != null) {
+ for (Source source : dataset.getCreators()) {
+ ProfileUtils.addIfNotNull(met, "Creator", source.getName());
+ }
+ }
+
+ if (dataset.getDataFormatType() != null) {
+ ProfileUtils.addIfNotNull(met, "DataFormatType", dataset
+ .getDataFormatType().toString());
+ }
+
+ if (dataset.getDataType() != null) {
+ ProfileUtils.addIfNotNull(met, "DataType", dataset.getDataType()
+ .toString());
+ }
+
+ if (dataset.getDates() != null) {
+ for (DateType dateType : dataset.getDates()) {
+ String dateString = null;
+ try {
+ dateString = ProfileUtils.toISO8601(dateType.getDate());
+ } catch (Exception e) {
+ LOG.log(Level.WARNING,
+ "Error converting date: [" + dateType.getDate() + "]: Message: "
+ + e.getMessage());
+ }
+ ProfileUtils.addIfNotNull(met, "Dates", dateString);
+ }
+ }
+
+ if (dataset.getDocumentation() != null) {
+ for (InvDocumentation doc : dataset.getDocumentation()) {
+ ProfileUtils.addIfNotNull(met, "Documentation",
doc.getInlineContent());
+ }
+ }
+
+ ProfileUtils.addIfNotNull(met, "FullName", dataset.getFullName());
+ GeospatialCoverage geoCoverage = dataset.getGeospatialCoverage();
+ if (geoCoverage != null) {
+ LatLonRect bbox = geoCoverage.getBoundingBox();
+ if (bbox != null) {
+ ProfileUtils.addIfNotNull(met, "SouthwestBC", bbox.getLowerLeftPoint()
+ .toString());
+ ProfileUtils.addIfNotNull(met, "NorthwestBC", bbox.getUpperLeftPoint()
+ .toString());
+ ProfileUtils.addIfNotNull(met, "NortheastBC", bbox.getUpperRightPoint()
+ .toString());
+ ProfileUtils.addIfNotNull(met, "SoutheastBC", bbox.getLowerRightPoint()
+ .toString());
+ } else {
+ // try north south, east west
+ if (geoCoverage.getNorthSouthRange() != null) {
+ Range nsRange = geoCoverage.getNorthSouthRange();
+ ProfileUtils.addIfNotNull(met, "NorthSouthRangeStart",
+ String.valueOf(nsRange.getStart()));
+ ProfileUtils.addIfNotNull(met, "NorthSouthRangeResolution",
+ String.valueOf(nsRange.getResolution()));
+ ProfileUtils.addIfNotNull(met, "NorthSouthRangeSize",
+ String.valueOf(nsRange.getSize()));
+ ProfileUtils.addIfNotNull(met, "NorthSouthRangeUnits",
+ nsRange.getUnits());
+ }
+
+ if (geoCoverage.getEastWestRange() != null) {
+ Range nsRange = geoCoverage.getEastWestRange();
+ ProfileUtils.addIfNotNull(met, "EastWestRangeStart",
+ String.valueOf(nsRange.getStart()));
+ ProfileUtils.addIfNotNull(met, "EastWestRangeResolution",
+ String.valueOf(nsRange.getResolution()));
+ ProfileUtils.addIfNotNull(met, "EastWestRangeSize",
+ String.valueOf(nsRange.getSize()));
+ ProfileUtils.addIfNotNull(met, "EastWestRangeUnits",
+ nsRange.getUnits());
+ }
+ }
+
+ ProfileUtils.addIfNotNull(met, "GeospatialCoverageLatitudeResolution",
+ String.valueOf(dataset.getGeospatialCoverage().getLatResolution()));
+ ProfileUtils.addIfNotNull(met, "GeospatialCoverageLongitudeResolution",
+ String.valueOf(dataset.getGeospatialCoverage().getLonResolution()));
+
+ if (dataset.getGeospatialCoverage().getNames() != null) {
+ for (Vocab gName : dataset.getGeospatialCoverage().getNames()) {
+ ProfileUtils.addIfNotNull(met, "GeospatialCoverage",
gName.getText());
+ }
+ }
+
+ }
+
+ ProfileUtils.addIfNotNull(met, "History", dataset.getHistory());
+ ProfileUtils.addIfNotNull(met, "ID", dataset.getID());
+ if (dataset.getKeywords() != null) {
+ for (Vocab vocab : dataset.getKeywords()) {
+ ProfileUtils.addIfNotNull(met, "Keywords", vocab.getText());
+ }
+ }
+ ProfileUtils.addIfNotNull(met, "Name", dataset.getName());
+ ProfileUtils.addIfNotNull(met, "Processing", dataset.getProcessing());
+ if (dataset.getProjects() != null) {
+ for (Vocab vocab : dataset.getProjects()) {
+ ProfileUtils.addIfNotNull(met, "Projects", vocab.getText());
+ }
+ }
+
+ if (dataset.getProperties() != null) {
+ for (InvProperty prop : dataset.getProperties()) {
+ ProfileUtils.addIfNotNull(met, prop.getName(), prop.getValue());
+ }
+ }
+
+ if (dataset.getPublishers() != null) {
+ for (Source source : dataset.getPublishers()) {
+ ProfileUtils.addIfNotNull(met, "Publishers", source.getName());
+ }
+ }
+
+ ProfileUtils.addIfNotNull(met, "RestrictAccess",
+ dataset.getRestrictAccess());
+ ProfileUtils.addIfNotNull(met, "Rights", dataset.getRights());
+ ProfileUtils.addIfNotNull(met, "Summary", dataset.getSummary());
+ if (dataset.getTimeCoverage() != null) {
+ String startDateTimeStr = null, endDateTimeStr = null;
+ try {
+ startDateTimeStr = ProfileUtils.toISO8601(dataset.getTimeCoverage()
+ .getStart().getDate());
+ endDateTimeStr = ProfileUtils.toISO8601(dataset.getTimeCoverage()
+ .getEnd().getDate());
+ } catch (Exception e) {
+ LOG.log(
+ Level.WARNING,
+ "Error converting start/end date time strings: Message: "
+ + e.getMessage());
+ }
+
+ ProfileUtils.addIfNotNull(met, "StartDateTime", startDateTimeStr);
+ ProfileUtils.addIfNotNull(met, "EndDateTime", endDateTimeStr);
+ }
+
+ if (dataset.getTimeCoverage() != null
+ && dataset.getTimeCoverage().getResolution() != null) {
+ ProfileUtils.addIfNotNull(met, "TimeCoverageResolution", dataset
+ .getTimeCoverage().getResolution().getText());
+ }
+ // dataset unique ID
+ if (StringUtils.hasText(dataset.getUniqueID())
+ && !dataset.getUniqueID().equalsIgnoreCase("null")) {
+ // note: globally unique ID, or string "null" if missing authority or ID
+ ProfileUtils.addIfNotNull(met, "UniqueID", dataset.getUniqueID());
+ } else {
+ // dataset ID is typically not null
+ ProfileUtils.addIfNotNull(met, "UniqueID", dataset.getID());
+ }
+ // generate a UUID for each dataset, to be used as profile ID
+ ProfileUtils.addIfNotNull(met, "UUID", UUID.randomUUID().toString());
+
+ if (dataset.getVariables() != null) {
+ for (Variables vars : dataset.getVariables()) {
+ if (vars.getVariableList() != null) {
+ for (Variable var : vars.getVariableList()) {
+ // store variable names
+ ProfileUtils.addIfNotNull(met, "Variables", var.getName());
+ // store variable long names
+ ProfileUtils.addIfNotNull(met, "Variable Long Names",
+ var.getDescription());
+ // store CF standard names
+ if (StringUtils.hasText(vars.getVocabulary())
+ && vars.getVocabulary().startsWith("CF-")) {
+ ProfileUtils.addIfNotNull(met, "CF Standard Names",
+ var.getVocabularyName());
+ }
+ }
+ }
+ }
+ }
+
+ // store access services
+ for (InvAccess access : dataset.getAccess()) {
+ ProfileUtils.addIfNotNull(met, access.getService().getServiceType()
+ .toString(), access.getStandardUrlName());
+ }
+
+ }
+
+}
Modified:
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/util/ProfileUtils.java
URL:
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/util/ProfileUtils.java?rev=1231816&r1=1231815&r2=1231816&view=diff
==============================================================================
---
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/util/ProfileUtils.java
(original)
+++
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/util/ProfileUtils.java
Mon Jan 16 03:26:27 2012
@@ -18,28 +18,30 @@
package org.apache.oodt.opendapps.util;
//JDK imports
+import static
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.ENUM_ELEMENT_TYPE;
+import static
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.PROF_ATTR_SPEC_TYPE;
+import static
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.PROF_ELEM_SPEC_TYPE;
+import static
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.RANGED_ELEMENT_TYPE;
+import static
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.RES_ATTR_SPEC_TYPE;
+
+import java.text.SimpleDateFormat;
import java.util.Arrays;
+import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.TimeZone;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;
-//OPeNDAP imports
import opendap.dap.BaseType;
import opendap.dap.DArray;
import opendap.dap.DConnect;
import opendap.dap.DDS;
import opendap.dap.DGrid;
-//OODT imports
-import static
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.ENUM_ELEMENT_TYPE;
-import static
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.PROF_ATTR_SPEC_TYPE;
-import static
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.PROF_ELEM_SPEC_TYPE;
-import static
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.RANGED_ELEMENT_TYPE;
-import static
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.RES_ATTR_SPEC_TYPE;
import org.apache.oodt.cas.metadata.Metadata;
import org.apache.oodt.cas.metadata.util.PathUtils;
import org.apache.oodt.opendapps.OpendapProfileElementExtractor;
@@ -52,6 +54,7 @@ import org.apache.oodt.profile.Profile;
import org.apache.oodt.profile.ProfileAttributes;
import org.apache.oodt.profile.ProfileElement;
import org.apache.oodt.profile.ResourceAttributes;
+import org.springframework.util.StringUtils;
/**
*
@@ -281,5 +284,44 @@ public class ProfileUtils {
}
return null;
}
+
+ /**
+ * Method to add a (name,value) pair to the metadata container if the value
is not null or empty.
+ * @param met
+ * @param field
+ * @param value
+ */
+ public static void addIfNotNull(Metadata met, String key, String value) {
+ if (StringUtils.hasText(value)) {
+ met.addMetadata(key, value);
+ }
+ }
+
+ /**
+ * Method to add multiple (key, value) pairs to the metadata container
if not existing already.
+ * @param met
+ * @param field
+ * @param value
+ */
+ public static void addIfNotExisting(Metadata metadata, String key,
Enumeration<String> values) {
+ if (StringUtils.hasText(key) && !metadata.containsKey(key)) {
+ while (values.hasMoreElements()) {
+ String value = values.nextElement();
+ if (StringUtils.hasText(value)) {
+ metadata.addMetadata(key,value);
+ }
+ }
+ }
+ }
+
+ // inspired from ASLv2 code at:
+ // http://www.java2s.com/Code/Java/Data-Type/ISO8601dateparsingutility.htm
+ public static String toISO8601(Date date) {
+ SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
+ TimeZone tz = TimeZone.getTimeZone("UTC");
+ df.setTimeZone(tz);
+ String output = df.format(date);
+ return output;
+ }
}