Author: mattmann
Date: Mon Jan 16 03:26:27 2012
New Revision: 1231816

URL: http://svn.apache.org/viewvc?rev=1231816&view=rev
Log:
- fix for OODT-368 Refactoring of metadata extraction functionality for 
opendapps module (contributed by Luca Cinquini)

Added:
    oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/
    
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/DasMetadataExtractor.java
    
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/MetadataExtractor.java
    
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/NcmlMetadataExtractor.java
    
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/ThreddsMetadataExtractor.java
Modified:
    oodt/trunk/CHANGES.txt
    
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/DatasetCrawler.java
    
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/OpendapProfileHandler.java
    
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/util/ProfileUtils.java

Modified: oodt/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/oodt/trunk/CHANGES.txt?rev=1231816&r1=1231815&r2=1231816&view=diff
==============================================================================
--- oodt/trunk/CHANGES.txt (original)
+++ oodt/trunk/CHANGES.txt Mon Jan 16 03:26:27 2012
@@ -4,6 +4,9 @@ Apache OODT Change Log
 Release 0.4: Current Development
 --------------------------------------------
 
+* OODT-368 Refactoring of metadata extraction functionality for 
+  opendapps module (Luca Cinquini, mattmann)
+
 * OODT-366 Extension to opendapps module to extract ALL variables 
   in DDS stream (Luca Cinquini, mattmann)
 

Modified: 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/DatasetCrawler.java
URL: 
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/DatasetCrawler.java?rev=1231816&r1=1231815&r2=1231816&view=diff
==============================================================================
--- 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/DatasetCrawler.java
 (original)
+++ 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/DatasetCrawler.java
 Mon Jan 16 03:26:27 2012
@@ -19,41 +19,24 @@ package org.apache.oodt.opendapps;
 
 //JDK imports
 import java.net.URL;
-import java.text.SimpleDateFormat;
-import java.util.Date;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.TimeZone;
 import java.util.Vector;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-//OODT imports
 import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.oodt.opendapps.extractors.MetadataExtractor;
+import org.apache.oodt.opendapps.extractors.ThreddsMetadataExtractor;
 
-//Spring imports
-import org.springframework.util.StringUtils;
-
-//OPeNDAP/THREDDS imports
 import thredds.catalog.InvAccess;
 import thredds.catalog.InvCatalogRef;
 import thredds.catalog.InvDataset;
-import thredds.catalog.InvDocumentation;
-import thredds.catalog.InvProperty;
 import thredds.catalog.InvService;
 import thredds.catalog.ServiceType;
-import thredds.catalog.ThreddsMetadata.Contributor;
-import thredds.catalog.ThreddsMetadata.GeospatialCoverage;
-import thredds.catalog.ThreddsMetadata.Range;
-import thredds.catalog.ThreddsMetadata.Source;
-import thredds.catalog.ThreddsMetadata.Variable;
-import thredds.catalog.ThreddsMetadata.Variables;
-import thredds.catalog.ThreddsMetadata.Vocab;
 import thredds.catalog.crawl.CatalogCrawler;
-import ucar.nc2.units.DateType;
-import ucar.unidata.geoloc.LatLonRect;
 
 /**
  * Crawls a catalog and returns all the datasets and their references.
@@ -107,7 +90,12 @@ public class DatasetCrawler implements C
         if (service.getServiceType()==ServiceType.OPENDAP) {
           LOG.log(Level.INFO, "Found OpenDAP access URL: "+ 
single.getUrlPath());
           String opendapurl = this.datasetURL + single.getUrlPath();
-          this.datasetMet.put(opendapurl, this.extractDatasetMet(dd));
+          // extract metadata from THREDDS catalog
+          MetadataExtractor extractor = new ThreddsMetadataExtractor(dd);
+          Metadata met = new Metadata();
+          extractor.extract(met);
+          // index metadata by opendap access URL
+          this.datasetMet.put(opendapurl, met);
           this.urls.add(opendapurl);
           break;
         }
@@ -134,191 +122,4 @@ public class DatasetCrawler implements C
     return this.datasetMet;
   }
 
-  private Metadata extractDatasetMet(InvDataset dataset) {
-       
-       LOG.log(Level.INFO, "Crawling catalog URL=" + dataset.getCatalogUrl()+" 
dataset ID="+dataset.getID());
-       
-    Metadata met = new Metadata();
-    this.addIfNotNull(met, "Authority", dataset.getAuthority());
-    this.addIfNotNull(met, "CatalogUrl", dataset.getCatalogUrl());
-    this.addIfNotNull(met, "DatasetFullName", dataset.getFullName());
-    if (dataset.getContributors() != null) {
-      for (Contributor contributor : dataset.getContributors()) {
-        this.addIfNotNull(met, "Contributor", contributor.getName());
-      }
-    }
-
-    if (dataset.getCreators() != null) {
-      for (Source source : dataset.getCreators()) {
-        this.addIfNotNull(met, "Creator", source.getName());
-      }
-    }
-
-    if (dataset.getDataFormatType() != null){
-       this.addIfNotNull(met, "DataFormatType", dataset.getDataFormatType()
-                       .toString());
-    }
-    
-    if (dataset.getDataType() != null){
-       this.addIfNotNull(met, "DataType", dataset.getDataType().toString());
-    }
-    
-    if (dataset.getDates() != null) {
-      for (DateType dateType : dataset.getDates()) {
-        String dateString = null;
-        try {
-          dateString = toISO8601(dateType.getDate());
-        } catch (Exception e) {
-          LOG.log(Level.WARNING, "Error converting date: ["
-              + dateType.getDate() + "]: Message: " + e.getMessage());
-        }
-        this.addIfNotNull(met, "Dates", dateString);
-      }
-    }
-
-    if (dataset.getDocumentation() != null) {
-      for (InvDocumentation doc : dataset.getDocumentation()) {
-        this.addIfNotNull(met, "Documentation", doc.getInlineContent());
-      }
-    }
-
-    this.addIfNotNull(met, "FullName", dataset.getFullName());
-    GeospatialCoverage geoCoverage = dataset.getGeospatialCoverage();
-    if (geoCoverage != null) {
-      LatLonRect bbox = geoCoverage.getBoundingBox();
-      if (bbox != null) {
-        this.addIfNotNull(met, "SouthwestBC", bbox.getLowerLeftPoint()
-            .toString());
-        this.addIfNotNull(met, "NorthwestBC", bbox.getUpperLeftPoint()
-            .toString());
-        this.addIfNotNull(met, "NortheastBC", bbox.getUpperRightPoint()
-            .toString());
-        this.addIfNotNull(met, "SoutheastBC", bbox.getLowerRightPoint()
-            .toString());
-      } else {
-        // try north south, east west
-        if (geoCoverage.getNorthSouthRange() != null) {
-          Range nsRange = geoCoverage.getNorthSouthRange();
-          this.addIfNotNull(met, "NorthSouthRangeStart", String.valueOf(nsRange
-              .getStart()));
-          this.addIfNotNull(met, "NorthSouthRangeResolution", String
-              .valueOf(nsRange.getResolution()));
-          this.addIfNotNull(met, "NorthSouthRangeSize", String.valueOf(nsRange
-              .getSize()));
-          this.addIfNotNull(met, "NorthSouthRangeUnits", nsRange.getUnits());
-        }
-
-        if (geoCoverage.getEastWestRange() != null) {
-          Range nsRange = geoCoverage.getEastWestRange();
-          this.addIfNotNull(met, "EastWestRangeStart", String.valueOf(nsRange
-              .getStart()));
-          this.addIfNotNull(met, "EastWestRangeResolution", String
-              .valueOf(nsRange.getResolution()));
-          this.addIfNotNull(met, "EastWestRangeSize", String.valueOf(nsRange
-              .getSize()));
-          this.addIfNotNull(met, "EastWestRangeUnits", nsRange.getUnits());
-        }
-      }
-
-      this.addIfNotNull(met, "GeospatialCoverageLatitudeResolution", String
-          .valueOf(dataset.getGeospatialCoverage().getLatResolution()));
-      this.addIfNotNull(met, "GeospatialCoverageLongitudeResolution", String
-          .valueOf(dataset.getGeospatialCoverage().getLonResolution()));
-      
-      if(dataset.getGeospatialCoverage().getNames() != null){
-        for(Vocab gName: dataset.getGeospatialCoverage().getNames()){
-           this.addIfNotNull(met, "GeospatialCoverage", gName.getText());
-        }
-      }
-      
-    }
-
-    this.addIfNotNull(met, "History", dataset.getHistory());
-    this.addIfNotNull(met, "ID", dataset.getID());
-    if (dataset.getKeywords() != null) {
-      for (Vocab vocab : dataset.getKeywords()) {
-        this.addIfNotNull(met, "Keywords", vocab.getText());
-      }
-    }
-    this.addIfNotNull(met, "Name", dataset.getName());
-    this.addIfNotNull(met, "Processing", dataset.getProcessing());
-    if (dataset.getProjects() != null) {
-      for (Vocab vocab : dataset.getProjects()) {
-        this.addIfNotNull(met, "Projects", vocab.getText());
-      }
-    }
-
-    if (dataset.getProperties() != null) {
-      for (InvProperty prop : dataset.getProperties()) {
-        this.addIfNotNull(met, prop.getName(), prop.getValue());
-      }
-    }
-
-    if (dataset.getPublishers() != null) {
-      for (Source source : dataset.getPublishers()) {
-        this.addIfNotNull(met, "Publishers", source.getName());
-      }
-    }
-
-    this.addIfNotNull(met, "RestrictAccess", dataset.getRestrictAccess());
-    this.addIfNotNull(met, "Rights", dataset.getRights());
-    this.addIfNotNull(met, "Summary", dataset.getSummary());
-    if (dataset.getTimeCoverage() != null) {
-      String startDateTimeStr = null, endDateTimeStr = null;
-      try {
-        startDateTimeStr = toISO8601(dataset.getTimeCoverage()
-            .getStart().getDate());
-        endDateTimeStr = toISO8601(dataset.getTimeCoverage()
-            .getEnd().getDate());
-      } catch (Exception e) {
-        LOG.log(Level.WARNING,
-            "Error converting start/end date time strings: Message: "
-                + e.getMessage());
-      }
-
-      this.addIfNotNull(met, "StartDateTime", startDateTimeStr);
-      this.addIfNotNull(met, "EndDateTime", endDateTimeStr);
-    }
-
-    if (dataset.getTimeCoverage() != null && 
dataset.getTimeCoverage().getResolution() != null) {
-      this.addIfNotNull(met, "TimeCoverageResolution", dataset
-          .getTimeCoverage().getResolution().getText());
-    }
-    // dataset unique ID
-    if (StringUtils.hasText(dataset.getUniqueID()) && 
!dataset.getUniqueID().equalsIgnoreCase("null")) {
-       // note: globally unique ID, or string "null" if missing authority or ID
-       this.addIfNotNull(met, "UniqueID", dataset.getUniqueID());
-    } else {
-       // dataset ID is typically not null
-       this.addIfNotNull(met, "UniqueID", dataset.getID());
-    }
-
-    if (dataset.getVariables() != null) {
-      for (Variables vars : dataset.getVariables()) {
-        if (vars.getVariableList() != null) {
-          for (Variable var : vars.getVariableList()) {
-            this.addIfNotNull(met, "Variables", var.getName());
-          }
-        }
-      }
-    }
-    return met;
-  }
-
-  private void addIfNotNull(Metadata met, String field, String value) {
-    if (value != null && !value.equals("")) {
-      met.addMetadata(field, value);
-    }
-  }
-  
-  // inspired from ASLv2 code at:
-  // http://www.java2s.com/Code/Java/Data-Type/ISO8601dateparsingutility.htm
-  private String toISO8601(Date date) {
-    SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
-    TimeZone tz = TimeZone.getTimeZone("UTC");
-    df.setTimeZone(tz);
-    String output = df.format(date);
-    return output;
-  }
-
 }

Modified: 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/OpendapProfileHandler.java
URL: 
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/OpendapProfileHandler.java?rev=1231816&r1=1231815&r2=1231816&view=diff
==============================================================================
--- 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/OpendapProfileHandler.java
 (original)
+++ 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/OpendapProfileHandler.java
 Mon Jan 16 03:26:27 2012
@@ -27,14 +27,16 @@ import java.util.logging.Logger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-//OPeNDAP/THREDDS imports
 import opendap.dap.DConnect;
 
-//APACHE imports
 import org.apache.oodt.cas.metadata.Metadata;
 import org.apache.oodt.opendapps.config.DapRoot;
 import org.apache.oodt.opendapps.config.OpendapConfig;
 import org.apache.oodt.opendapps.config.OpendapConfigReader;
+import org.apache.oodt.opendapps.extractors.DasMetadataExtractor;
+import org.apache.oodt.opendapps.extractors.MetadataExtractor;
+import org.apache.oodt.opendapps.extractors.NcmlMetadataExtractor;
+import org.apache.oodt.opendapps.extractors.ThreddsMetadataExtractor;
 import org.apache.oodt.opendapps.util.ProfileUtils;
 import org.apache.oodt.profile.Profile;
 import org.apache.oodt.profile.ProfileException;
@@ -60,6 +62,9 @@ public class OpendapProfileHandler imple
   public OpendapProfileHandler(){
   }
 
+  /**
+   * Implementation of interface method
+   */
   public List<Profile> findProfiles(XMLQuery xmlQuery) throws ProfileException 
{
     String configFileLoc = null;
     String q = xmlQuery.getKwdQueryString();
@@ -89,6 +94,7 @@ public class OpendapProfileHandler imple
     List<Profile> profiles = new Vector<Profile>();
     List<DapRoot> roots = this.conf.getRoots();
          
+    // loop over THREDDS catalogs
     for (DapRoot root : roots) {
        LOG.log(Level.INFO,"Parsing DapRoot="+root.getDatasetUrl());
 
@@ -113,15 +119,39 @@ public class OpendapProfileHandler imple
                   + "]: Message: " + e.getMessage());
             }
 
-               Metadata datasetMet = d.getDatasetMet(opendapUrl);
+            // retrieve already extracted THREDDS metadata
+            Metadata datasetMet = d.getDatasetMet(opendapUrl);
+            
+            // extract DAS metadata
+            MetadataExtractor dasExtractor = new DasMetadataExtractor(dConn);
+            dasExtractor.extract(datasetMet);
+            
+            // extract NcML metadata, if available
+           if 
(datasetMet.containsKey(ThreddsMetadataExtractor.SERVICE_TYPE_NCML)) {
+               // retrieve URL of NcML document, previously stored
+               final String ncmlUrl = 
datasetMet.getMetadata(ThreddsMetadataExtractor.SERVICE_TYPE_NCML);
+               MetadataExtractor ncmlExtractor = new 
NcmlMetadataExtractor(ncmlUrl);
+               ncmlExtractor.extract(datasetMet);
+            }
+            
+            // debug: write out all metadata entries
+            for (String key : datasetMet.getAllKeys()) {
+                 LOG.log(Level.FINE, "Metadata key="+key+" 
value="+datasetMet.getMetadata(key));
+            }
+         
+            // <resAttributes>
             profile.setResourceAttributes(ProfileUtils.getResourceAttributes(
                 this.conf, opendapUrl, dConn, datasetMet));
+            // <profAttributes>
             profile.setProfileAttributes(ProfileUtils
                 .getProfileAttributes(this.conf, datasetMet));
+            // <profElement>
             profile.getProfileElements().putAll(
                 ProfileUtils.getProfileElements(this.conf, dConn, datasetMet, 
profile));
             profiles.add(profile);
             LOG.log(Level.INFO, "Added profile 
id="+profile.getProfileAttributes().getID());
+            
+            
           } catch(Exception e) {
                // in case of exception, don't harvest this dataset, but keep 
going
                LOG.log(Level.WARNING,"Error while building profile for 
opendapurl="+opendapUrl); 

Added: 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/DasMetadataExtractor.java
URL: 
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/DasMetadataExtractor.java?rev=1231816&view=auto
==============================================================================
--- 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/DasMetadataExtractor.java
 (added)
+++ 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/DasMetadataExtractor.java
 Mon Jan 16 03:26:27 2012
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oodt.opendapps.extractors;
+
+//JDK imports
+import java.util.Enumeration;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+//OPENDAP imports
+import opendap.dap.Attribute;
+import opendap.dap.AttributeTable;
+import opendap.dap.DAS;
+import opendap.dap.DConnect;
+
+//OODT imports
+import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.oodt.opendapps.util.ProfileUtils;
+
+/**
+ * Implementation of {@link MetadataExtractor} to extract metadata from an
+ * OpenDAP DAS source. Currently this class only extracts metadata from the
+ * NetCDF global attributes of type String, disregarding all others.
+ * 
+ * @author Luca Cinquini
+ * 
+ */
+public class DasMetadataExtractor implements MetadataExtractor {
+
+  // prefix for all NetCDF global attributes
+  public static final String NC_GLOBAL = "NC_GLOBAL";
+
+  // NetCDF data types
+  public static final int INT32_TYPE = 6;
+  public static final int INT64_TYPE = 7;
+  public static final int FLOAT32_TYPE = 8;
+  public static final int FLOAT64_TYPE = 9;
+  public static final int STRING_TYPE = 10;
+
+  private static Logger LOG = Logger.getLogger(DasMetadataExtractor.class
+      .getName());
+
+  /**
+   * The DAS stream which is the metadata source.
+   */
+  private final DConnect dConn;
+
+  public DasMetadataExtractor(DConnect dConn) {
+    this.dConn = dConn;
+  }
+
+  /**
+   * The main metadata extraction method.
+   * 
+   * @param metadata
+   *          : the metadata target, specifically the CAS metadata container.
+   */
+  public void extract(Metadata metadata) {
+
+    LOG.log(Level.INFO, "Parsing DAS metadata from: " + dConn.URL());
+
+    try {
+      DAS das = dConn.getDAS();
+      @SuppressWarnings("unchecked")
+      Enumeration<String> names = das.getNames();
+      while (names.hasMoreElements()) {
+        String attName = (String) names.nextElement();
+        LOG.log(Level.FINE, "Extracting DAS attribute: " + attName);
+
+        AttributeTable at = das.getAttributeTable(attName);
+        Enumeration e = at.getNames();
+        while (e.hasMoreElements()) {
+          String key = (String) e.nextElement();
+          Attribute att = at.getAttribute(key);
+          LOG.log(Level.FINER,
+              "\t" + att.getName() + " value=" + att.getValueAt(0) + "type="
+                  + att.getType());
+
+          // store NetCDF global attributes
+          if (attName.equals(NC_GLOBAL)) {
+            if (att.getType() == STRING_TYPE) {
+              ProfileUtils.addIfNotExisting(metadata, key, att.getValues());
+            }
+          }
+        }
+
+      }
+    } catch (Exception e) {
+      LOG.log(Level.WARNING, "Error parsing DAS metadata: " + e.getMessage());
+    }
+
+  }
+
+}

Added: 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/MetadataExtractor.java
URL: 
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/MetadataExtractor.java?rev=1231816&view=auto
==============================================================================
--- 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/MetadataExtractor.java
 (added)
+++ 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/MetadataExtractor.java
 Mon Jan 16 03:26:27 2012
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oodt.opendapps.extractors;
+
+//OODT imports
+import org.apache.oodt.cas.metadata.Metadata;
+
+/**
+ * Interface for extracting metadata from a generic web accessible resource 
into
+ * a CAS metadata container. Each implementation class must be responsible for
+ * instantiating and accessing the specific metadata source as appropriate.
+ * 
+ * @author Luca Cinquini
+ * 
+ */
+public interface MetadataExtractor {
+
+  /**
+   * Method to (further) populate the metadata container. Any extracted 
metadata
+   * is added to the current metadata content.
+   * 
+   * @param metadata
+   */
+  void extract(Metadata metadata);
+
+}

Added: 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/NcmlMetadataExtractor.java
URL: 
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/NcmlMetadataExtractor.java?rev=1231816&view=auto
==============================================================================
--- 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/NcmlMetadataExtractor.java
 (added)
+++ 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/NcmlMetadataExtractor.java
 Mon Jan 16 03:26:27 2012
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oodt.opendapps.extractors;
+
+//JDK imports
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+//OODT imports
+import org.apache.oodt.cas.metadata.Metadata;
+
+/**
+ * Implementation of {@link MetadataExtractor} that parses an NcML XML 
document.
+ * Currently this class is simply a stub that doesn't do anything.
+ * 
+ * @author Luca Cinquini
+ * 
+ */
+public class NcmlMetadataExtractor implements MetadataExtractor {
+
+  private final String ncmlUrl;
+
+  private static Logger LOG = Logger.getLogger(NcmlMetadataExtractor.class
+      .getName());
+
+  public NcmlMetadataExtractor(String ncmlUrl) {
+    this.ncmlUrl = ncmlUrl;
+  }
+
+  /**
+   * Stub implementation of interface method.
+   */
+  public void extract(Metadata metadata) {
+
+    LOG.log(Level.INFO, "Parsing NcML metadata from: " + ncmlUrl);
+
+  }
+
+}

Added: 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/ThreddsMetadataExtractor.java
URL: 
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/ThreddsMetadataExtractor.java?rev=1231816&view=auto
==============================================================================
--- 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/ThreddsMetadataExtractor.java
 (added)
+++ 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/extractors/ThreddsMetadataExtractor.java
 Mon Jan 16 03:26:27 2012
@@ -0,0 +1,268 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oodt.opendapps.extractors;
+
+//JDK imports
+import java.util.UUID;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+//OODT imports
+import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.oodt.opendapps.util.ProfileUtils;
+
+//Spring imports
+import org.springframework.util.StringUtils;
+
+//THREDDS imports
+import thredds.catalog.InvAccess;
+import thredds.catalog.InvDataset;
+import thredds.catalog.InvDocumentation;
+import thredds.catalog.InvProperty;
+import thredds.catalog.ThreddsMetadata.Contributor;
+import thredds.catalog.ThreddsMetadata.GeospatialCoverage;
+import thredds.catalog.ThreddsMetadata.Range;
+import thredds.catalog.ThreddsMetadata.Source;
+import thredds.catalog.ThreddsMetadata.Variable;
+import thredds.catalog.ThreddsMetadata.Variables;
+import thredds.catalog.ThreddsMetadata.Vocab;
+import ucar.nc2.units.DateType;
+import ucar.unidata.geoloc.LatLonRect;
+
+/**
+ * Implementation of {@link MetadataExtractor} that extracts metadata from a
+ * Thredds dataset.
+ * 
+ * @author Luca Cinquini
+ * 
+ */
+public class ThreddsMetadataExtractor implements MetadataExtractor {
+
+  private static Logger LOG = Logger.getLogger(ThreddsMetadataExtractor.class
+      .getName());
+
+  // constant missing for 4.2 version of NetCDF library
+  public final static String SERVICE_TYPE_NCML = "NCML";
+
+  /**
+   * The source of metadata to be extracted.
+   */
+  private final InvDataset dataset;
+
+  public ThreddsMetadataExtractor(final InvDataset dataset) {
+    this.dataset = dataset;
+  }
+
+  public void extract(Metadata met) {
+
+    LOG.log(Level.INFO, "Crawling catalog URL=" + dataset.getCatalogUrl()
+        + " dataset ID=" + dataset.getID());
+
+    ProfileUtils.addIfNotNull(met, "Authority", dataset.getAuthority());
+    ProfileUtils.addIfNotNull(met, "CatalogUrl", dataset.getCatalogUrl());
+    ProfileUtils.addIfNotNull(met, "DatasetFullName", dataset.getFullName());
+    if (dataset.getContributors() != null) {
+      for (Contributor contributor : dataset.getContributors()) {
+        ProfileUtils.addIfNotNull(met, "Contributor", contributor.getName());
+      }
+    }
+
+    if (dataset.getCreators() != null) {
+      for (Source source : dataset.getCreators()) {
+        ProfileUtils.addIfNotNull(met, "Creator", source.getName());
+      }
+    }
+
+    if (dataset.getDataFormatType() != null) {
+      ProfileUtils.addIfNotNull(met, "DataFormatType", dataset
+          .getDataFormatType().toString());
+    }
+
+    if (dataset.getDataType() != null) {
+      ProfileUtils.addIfNotNull(met, "DataType", dataset.getDataType()
+          .toString());
+    }
+
+    if (dataset.getDates() != null) {
+      for (DateType dateType : dataset.getDates()) {
+        String dateString = null;
+        try {
+          dateString = ProfileUtils.toISO8601(dateType.getDate());
+        } catch (Exception e) {
+          LOG.log(Level.WARNING,
+              "Error converting date: [" + dateType.getDate() + "]: Message: "
+                  + e.getMessage());
+        }
+        ProfileUtils.addIfNotNull(met, "Dates", dateString);
+      }
+    }
+
+    if (dataset.getDocumentation() != null) {
+      for (InvDocumentation doc : dataset.getDocumentation()) {
+        ProfileUtils.addIfNotNull(met, "Documentation", 
doc.getInlineContent());
+      }
+    }
+
+    ProfileUtils.addIfNotNull(met, "FullName", dataset.getFullName());
+    GeospatialCoverage geoCoverage = dataset.getGeospatialCoverage();
+    if (geoCoverage != null) {
+      LatLonRect bbox = geoCoverage.getBoundingBox();
+      if (bbox != null) {
+        ProfileUtils.addIfNotNull(met, "SouthwestBC", bbox.getLowerLeftPoint()
+            .toString());
+        ProfileUtils.addIfNotNull(met, "NorthwestBC", bbox.getUpperLeftPoint()
+            .toString());
+        ProfileUtils.addIfNotNull(met, "NortheastBC", bbox.getUpperRightPoint()
+            .toString());
+        ProfileUtils.addIfNotNull(met, "SoutheastBC", bbox.getLowerRightPoint()
+            .toString());
+      } else {
+        // try north south, east west
+        if (geoCoverage.getNorthSouthRange() != null) {
+          Range nsRange = geoCoverage.getNorthSouthRange();
+          ProfileUtils.addIfNotNull(met, "NorthSouthRangeStart",
+              String.valueOf(nsRange.getStart()));
+          ProfileUtils.addIfNotNull(met, "NorthSouthRangeResolution",
+              String.valueOf(nsRange.getResolution()));
+          ProfileUtils.addIfNotNull(met, "NorthSouthRangeSize",
+              String.valueOf(nsRange.getSize()));
+          ProfileUtils.addIfNotNull(met, "NorthSouthRangeUnits",
+              nsRange.getUnits());
+        }
+
+        if (geoCoverage.getEastWestRange() != null) {
+          Range nsRange = geoCoverage.getEastWestRange();
+          ProfileUtils.addIfNotNull(met, "EastWestRangeStart",
+              String.valueOf(nsRange.getStart()));
+          ProfileUtils.addIfNotNull(met, "EastWestRangeResolution",
+              String.valueOf(nsRange.getResolution()));
+          ProfileUtils.addIfNotNull(met, "EastWestRangeSize",
+              String.valueOf(nsRange.getSize()));
+          ProfileUtils.addIfNotNull(met, "EastWestRangeUnits",
+              nsRange.getUnits());
+        }
+      }
+
+      ProfileUtils.addIfNotNull(met, "GeospatialCoverageLatitudeResolution",
+          String.valueOf(dataset.getGeospatialCoverage().getLatResolution()));
+      ProfileUtils.addIfNotNull(met, "GeospatialCoverageLongitudeResolution",
+          String.valueOf(dataset.getGeospatialCoverage().getLonResolution()));
+
+      if (dataset.getGeospatialCoverage().getNames() != null) {
+        for (Vocab gName : dataset.getGeospatialCoverage().getNames()) {
+          ProfileUtils.addIfNotNull(met, "GeospatialCoverage", 
gName.getText());
+        }
+      }
+
+    }
+
+    ProfileUtils.addIfNotNull(met, "History", dataset.getHistory());
+    ProfileUtils.addIfNotNull(met, "ID", dataset.getID());
+    if (dataset.getKeywords() != null) {
+      for (Vocab vocab : dataset.getKeywords()) {
+        ProfileUtils.addIfNotNull(met, "Keywords", vocab.getText());
+      }
+    }
+    ProfileUtils.addIfNotNull(met, "Name", dataset.getName());
+    ProfileUtils.addIfNotNull(met, "Processing", dataset.getProcessing());
+    if (dataset.getProjects() != null) {
+      for (Vocab vocab : dataset.getProjects()) {
+        ProfileUtils.addIfNotNull(met, "Projects", vocab.getText());
+      }
+    }
+
+    if (dataset.getProperties() != null) {
+      for (InvProperty prop : dataset.getProperties()) {
+        ProfileUtils.addIfNotNull(met, prop.getName(), prop.getValue());
+      }
+    }
+
+    if (dataset.getPublishers() != null) {
+      for (Source source : dataset.getPublishers()) {
+        ProfileUtils.addIfNotNull(met, "Publishers", source.getName());
+      }
+    }
+
+    ProfileUtils.addIfNotNull(met, "RestrictAccess",
+        dataset.getRestrictAccess());
+    ProfileUtils.addIfNotNull(met, "Rights", dataset.getRights());
+    ProfileUtils.addIfNotNull(met, "Summary", dataset.getSummary());
+    if (dataset.getTimeCoverage() != null) {
+      String startDateTimeStr = null, endDateTimeStr = null;
+      try {
+        startDateTimeStr = ProfileUtils.toISO8601(dataset.getTimeCoverage()
+            .getStart().getDate());
+        endDateTimeStr = ProfileUtils.toISO8601(dataset.getTimeCoverage()
+            .getEnd().getDate());
+      } catch (Exception e) {
+        LOG.log(
+            Level.WARNING,
+            "Error converting start/end date time strings: Message: "
+                + e.getMessage());
+      }
+
+      ProfileUtils.addIfNotNull(met, "StartDateTime", startDateTimeStr);
+      ProfileUtils.addIfNotNull(met, "EndDateTime", endDateTimeStr);
+    }
+
+    if (dataset.getTimeCoverage() != null
+        && dataset.getTimeCoverage().getResolution() != null) {
+      ProfileUtils.addIfNotNull(met, "TimeCoverageResolution", dataset
+          .getTimeCoverage().getResolution().getText());
+    }
+    // dataset unique ID
+    if (StringUtils.hasText(dataset.getUniqueID())
+        && !dataset.getUniqueID().equalsIgnoreCase("null")) {
+      // note: globally unique ID, or string "null" if missing authority or ID
+      ProfileUtils.addIfNotNull(met, "UniqueID", dataset.getUniqueID());
+    } else {
+      // dataset ID is typically not null
+      ProfileUtils.addIfNotNull(met, "UniqueID", dataset.getID());
+    }
+    // generate a UUID for each dataset, to be used as profile ID
+    ProfileUtils.addIfNotNull(met, "UUID", UUID.randomUUID().toString());
+
+    if (dataset.getVariables() != null) {
+      for (Variables vars : dataset.getVariables()) {
+        if (vars.getVariableList() != null) {
+          for (Variable var : vars.getVariableList()) {
+            // store variable names
+            ProfileUtils.addIfNotNull(met, "Variables", var.getName());
+            // store variable long names
+            ProfileUtils.addIfNotNull(met, "Variable Long Names",
+                var.getDescription());
+            // store CF standard names
+            if (StringUtils.hasText(vars.getVocabulary())
+                && vars.getVocabulary().startsWith("CF-")) {
+              ProfileUtils.addIfNotNull(met, "CF Standard Names",
+                  var.getVocabularyName());
+            }
+          }
+        }
+      }
+    }
+
+    // store access services
+    for (InvAccess access : dataset.getAccess()) {
+      ProfileUtils.addIfNotNull(met, access.getService().getServiceType()
+          .toString(), access.getStandardUrlName());
+    }
+
+  }
+
+}

Modified: 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/util/ProfileUtils.java
URL: 
http://svn.apache.org/viewvc/oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/util/ProfileUtils.java?rev=1231816&r1=1231815&r2=1231816&view=diff
==============================================================================
--- 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/util/ProfileUtils.java
 (original)
+++ 
oodt/trunk/opendapps/src/main/java/org/apache/oodt/opendapps/util/ProfileUtils.java
 Mon Jan 16 03:26:27 2012
@@ -18,28 +18,30 @@
 package org.apache.oodt.opendapps.util;
 
 //JDK imports
+import static 
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.ENUM_ELEMENT_TYPE;
+import static 
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.PROF_ATTR_SPEC_TYPE;
+import static 
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.PROF_ELEM_SPEC_TYPE;
+import static 
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.RANGED_ELEMENT_TYPE;
+import static 
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.RES_ATTR_SPEC_TYPE;
+
+import java.text.SimpleDateFormat;
 import java.util.Arrays;
+import java.util.Date;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.TimeZone;
 import java.util.UUID;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-//OPeNDAP imports
 import opendap.dap.BaseType;
 import opendap.dap.DArray;
 import opendap.dap.DConnect;
 import opendap.dap.DDS;
 import opendap.dap.DGrid;
 
-//OODT imports
-import static 
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.ENUM_ELEMENT_TYPE;
-import static 
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.PROF_ATTR_SPEC_TYPE;
-import static 
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.PROF_ELEM_SPEC_TYPE;
-import static 
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.RANGED_ELEMENT_TYPE;
-import static 
org.apache.oodt.opendapps.config.OpendapConfigMetKeys.RES_ATTR_SPEC_TYPE;
 import org.apache.oodt.cas.metadata.Metadata;
 import org.apache.oodt.cas.metadata.util.PathUtils;
 import org.apache.oodt.opendapps.OpendapProfileElementExtractor;
@@ -52,6 +54,7 @@ import org.apache.oodt.profile.Profile;
 import org.apache.oodt.profile.ProfileAttributes;
 import org.apache.oodt.profile.ProfileElement;
 import org.apache.oodt.profile.ResourceAttributes;
+import org.springframework.util.StringUtils;
 
 /**
  * 
@@ -281,5 +284,44 @@ public class ProfileUtils {
        }
        return null;
   }
+  
+  /**
+   * Method to add a (name,value) pair to the metadata container if the value 
is not null or empty.
+   * @param met
+   * @param field
+   * @param value
+   */
+  public static void addIfNotNull(Metadata met, String key, String value) {
+       if (StringUtils.hasText(value)) {
+      met.addMetadata(key, value);
+    }
+  }
+  
+       /**
+        * Method to add multiple (key, value) pairs to the metadata container 
if not existing already.
+        * @param met
+        * @param field
+        * @param value
+        */
+       public static void addIfNotExisting(Metadata metadata, String key, 
Enumeration<String> values) {
+               if (StringUtils.hasText(key) && !metadata.containsKey(key)) {
+                       while (values.hasMoreElements()) {
+                               String value = values.nextElement();
+                               if (StringUtils.hasText(value)) {
+                                       metadata.addMetadata(key,value);
+                               }
+                       }
+               }
+       }
+  
+  // inspired from ASLv2 code at:
+  // http://www.java2s.com/Code/Java/Data-Type/ISO8601dateparsingutility.htm
+  public static String toISO8601(Date date) {
+    SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
+    TimeZone tz = TimeZone.getTimeZone("UTC");
+    df.setTimeZone(tz);
+    String output = df.format(date);
+    return output;
+  }
 
 }


Reply via email to