tika-advanced-parser-m...

bob Sat, 16 Jan 2016 10:24:05 -0800

Added: 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java?rev=1725014&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java
 Sat Jan 16 18:23:01 2016
@@ -0,0 +1,391 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.geoinfo;
+
+import org.apache.sis.internal.util.CheckedArrayList;
+import org.apache.sis.internal.util.CheckedHashSet;
+import org.apache.sis.metadata.iso.DefaultMetadata;
+import org.apache.sis.metadata.iso.DefaultMetadataScope;
+import org.apache.sis.metadata.iso.constraint.DefaultLegalConstraints;
+import org.apache.sis.metadata.iso.extent.DefaultGeographicBoundingBox;
+import org.apache.sis.metadata.iso.extent.DefaultGeographicDescription;
+import org.apache.sis.metadata.iso.identification.DefaultDataIdentification;
+import org.apache.sis.storage.DataStore;
+import org.apache.sis.storage.DataStoreException;
+import org.apache.sis.storage.DataStores;
+import org.apache.sis.storage.UnsupportedStorageException;
+import org.apache.sis.util.collection.CodeListSet;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.opengis.metadata.Identifier;
+import org.opengis.metadata.citation.Citation;
+import org.opengis.metadata.citation.CitationDate;
+import org.opengis.metadata.citation.OnlineResource;
+import org.opengis.metadata.citation.ResponsibleParty;
+import org.opengis.metadata.constraint.Restriction;
+import org.opengis.metadata.distribution.DigitalTransferOptions;
+import org.opengis.metadata.distribution.Distribution;
+import org.opengis.metadata.distribution.Distributor;
+import org.opengis.metadata.distribution.Format;
+import org.opengis.metadata.extent.Extent;
+import org.opengis.metadata.extent.GeographicExtent;
+import org.opengis.metadata.identification.Identification;
+import org.opengis.metadata.identification.Keywords;
+import org.opengis.metadata.identification.Progress;
+import org.opengis.metadata.identification.TopicCategory;
+import org.opengis.util.InternationalString;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.util.*;
+
+
+public class GeographicInformationParser extends AbstractParser{
+
+    public static final String geoInfoType="text/iso19139+xml";
+    private final Set<MediaType> SUPPORTED_TYPES =
+            Collections.singleton(MediaType.text("iso19139+xml"));
+
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void parse(InputStream inputStream, ContentHandler contentHandler, 
Metadata metadata, ParseContext parseContext) throws IOException, SAXException, 
TikaException {
+        metadata.set(Metadata.CONTENT_TYPE,geoInfoType);
+        DataStore dataStore= null;
+        DefaultMetadata defaultMetadata=null;
+        XHTMLContentHandler xhtmlContentHandler=new 
XHTMLContentHandler(contentHandler,metadata);
+
+        try {
+            TemporaryResources tmp = new TemporaryResources();
+            TikaInputStream 
tikaInputStream=TikaInputStream.get(inputStream,tmp);
+            File file= tikaInputStream.getFile();
+            dataStore = DataStores.open(file);
+            defaultMetadata=new DefaultMetadata(dataStore.getMetadata());
+            if(defaultMetadata!=null)
+                extract(xhtmlContentHandler, metadata, defaultMetadata);
+
+        }catch (UnsupportedStorageException e) {
+            throw new TikaException("UnsupportedStorageException",e);
+        }
+        catch (DataStoreException e) {
+            throw new TikaException("DataStoreException",e);
+        }
+    }
+
+    private void extract(XHTMLContentHandler xhtmlContentHandler,Metadata 
metadata, DefaultMetadata defaultMetadata) throws SAXException{
+        try {
+            getMetaDataCharacterSet(metadata, defaultMetadata);
+            getMetaDataContact(metadata, defaultMetadata);
+            getMetaDataIdentificationInfo(metadata, defaultMetadata);
+            getMetaDataDistributionInfo(metadata, defaultMetadata);
+            getMetaDataDateInfo(metadata, defaultMetadata);
+            getMetaDataResourceScope(metadata, defaultMetadata);
+            getMetaDataParentMetaDataTitle(metadata, defaultMetadata);
+            getMetaDataIdetifierCode(metadata, defaultMetadata);
+            getMetaDataStandard(metadata, defaultMetadata);
+            extractContent(xhtmlContentHandler, defaultMetadata);
+        }
+        catch(Exception e){
+            e.printStackTrace();
+        }
+    }
+
+    private void extractContent(XHTMLContentHandler xhtmlContentHandler, 
DefaultMetadata defaultMetadata) throws SAXException{
+        xhtmlContentHandler.startDocument();
+        xhtmlContentHandler.newline();
+
+        xhtmlContentHandler.newline();
+        ArrayList<Identification> identifications= (ArrayList<Identification>) 
defaultMetadata.getIdentificationInfo();
+        for(Identification i:identifications) {
+            xhtmlContentHandler.startElement("h1");
+            
xhtmlContentHandler.characters(i.getCitation().getTitle().toString());
+            xhtmlContentHandler.endElement("h1");
+            xhtmlContentHandler.newline();
+
+            ArrayList<ResponsibleParty> responsiblePartyArrayList = 
(ArrayList<ResponsibleParty>) i.getCitation().getCitedResponsibleParties();
+            for (ResponsibleParty r : responsiblePartyArrayList) {
+                xhtmlContentHandler.startElement("h3");
+                xhtmlContentHandler.newline();
+                xhtmlContentHandler.characters("CitedResponsiblePartyRole " + 
r.getRole().toString());
+                xhtmlContentHandler.characters("CitedResponsiblePartyName " + 
r.getIndividualName().toString());
+                xhtmlContentHandler.endElement("h3");
+                xhtmlContentHandler.newline();
+            }
+
+            xhtmlContentHandler.startElement("p");
+            xhtmlContentHandler.newline();
+            xhtmlContentHandler.characters("IdentificationInfoAbstract " + 
i.getAbstract().toString());
+            xhtmlContentHandler.endElement("p");
+            xhtmlContentHandler.newline();
+            Collection<Extent> extentList=((DefaultDataIdentification) 
i).getExtents();
+            for(Extent e:extentList){
+                ArrayList<GeographicExtent> geoElements= 
(ArrayList<GeographicExtent>) e.getGeographicElements();
+                for(GeographicExtent g:geoElements) {
+
+                    if (g instanceof DefaultGeographicBoundingBox) {
+                        xhtmlContentHandler.startElement("tr");
+                        xhtmlContentHandler.startElement("td");
+                        
xhtmlContentHandler.characters("GeographicElementWestBoundLatitude");
+                        xhtmlContentHandler.endElement("td");
+                        xhtmlContentHandler.startElement("td");
+                        
xhtmlContentHandler.characters(String.valueOf(((DefaultGeographicBoundingBox) 
g).getWestBoundLongitude()));
+                        xhtmlContentHandler.endElement("td");
+                        xhtmlContentHandler.endElement("tr");
+                        xhtmlContentHandler.startElement("tr");
+                        xhtmlContentHandler.startElement("td");
+                        
xhtmlContentHandler.characters("GeographicElementEastBoundLatitude");
+                        xhtmlContentHandler.endElement("td");
+                        xhtmlContentHandler.startElement("td");
+                        
xhtmlContentHandler.characters(String.valueOf(((DefaultGeographicBoundingBox) 
g).getEastBoundLongitude()));
+                        xhtmlContentHandler.endElement("td");
+                        xhtmlContentHandler.endElement("tr");
+                        xhtmlContentHandler.startElement("tr");
+                        xhtmlContentHandler.startElement("td");
+                        
xhtmlContentHandler.characters("GeographicElementNorthBoundLatitude");
+                        xhtmlContentHandler.endElement("td");
+                        xhtmlContentHandler.startElement("td");
+                        
xhtmlContentHandler.characters(String.valueOf(((DefaultGeographicBoundingBox) 
g).getNorthBoundLatitude()));
+                        xhtmlContentHandler.endElement("td");
+                        xhtmlContentHandler.endElement("tr");
+                        xhtmlContentHandler.startElement("tr");
+                        xhtmlContentHandler.startElement("td");
+                        
xhtmlContentHandler.characters("GeographicElementSouthBoundLatitude");
+                        xhtmlContentHandler.endElement("td");
+                        xhtmlContentHandler.startElement("td");
+                        
xhtmlContentHandler.characters(String.valueOf(((DefaultGeographicBoundingBox) 
g).getSouthBoundLatitude()));
+                        xhtmlContentHandler.endElement("td");
+                        xhtmlContentHandler.endElement("tr");
+                    }
+                }
+            }
+        }
+        xhtmlContentHandler.newline();
+        xhtmlContentHandler.endDocument();
+    }
+
+    private void getMetaDataCharacterSet(Metadata metadata, DefaultMetadata 
defaultMetaData){
+        CheckedHashSet<Charset> charSetList= (CheckedHashSet<Charset>) 
defaultMetaData.getCharacterSets();
+        for(Charset c:charSetList){
+            metadata.add("CharacterSet",c.name());
+        }
+    }
+
+
+    private void getMetaDataContact(Metadata metadata, DefaultMetadata 
defaultMetaData){
+        CheckedArrayList<ResponsibleParty> contactSet= 
(CheckedArrayList<ResponsibleParty>) defaultMetaData.getContacts();
+        for(ResponsibleParty rparty:contactSet){
+           if(rparty.getRole()!=null)
+                metadata.add("ContactRole",rparty.getRole().name());
+           if(rparty.getOrganisationName()!=null)
+                
metadata.add("ContactPartyName-",rparty.getOrganisationName().toString());
+        }
+    }
+
+    private void getMetaDataIdentificationInfo(Metadata metadata, 
DefaultMetadata defaultMetaData){
+        ArrayList<Identification> identifications= (ArrayList<Identification>) 
defaultMetaData.getIdentificationInfo();
+        for(Identification i:identifications){
+            DefaultDataIdentification defaultDataIdentification= 
(DefaultDataIdentification) i;
+            if(i.getCitation()!=null && i.getCitation().getTitle()!=null)
+                metadata.add("IdentificationInfoCitationTitle 
",i.getCitation().getTitle().toString());
+
+            ArrayList<CitationDate> dateArrayList= (ArrayList<CitationDate>) 
i.getCitation().getDates();
+            for (CitationDate d:dateArrayList){
+                if(d.getDateType()!=null)
+                    metadata.add("CitationDate 
",d.getDateType().name()+"-->"+d.getDate());
+            }
+            ArrayList<ResponsibleParty> responsiblePartyArrayList= 
(ArrayList<ResponsibleParty>) i.getCitation().getCitedResponsibleParties();
+            for(ResponsibleParty r:responsiblePartyArrayList){
+                if(r.getRole()!=null)
+                    metadata.add("CitedResponsiblePartyRole 
",r.getRole().toString());
+                if(r.getIndividualName()!=null)
+                    metadata.add("CitedResponsiblePartyName 
",r.getIndividualName().toString());
+                if(r.getOrganisationName()!=null)
+                    metadata.add("CitedResponsiblePartyOrganizationName ", 
r.getOrganisationName().toString());
+                if(r.getPositionName()!=null)
+                    metadata.add("CitedResponsiblePartyPositionName 
",r.getPositionName().toString());
+
+                if(r.getContactInfo()!=null){
+                    for(String 
s:r.getContactInfo().getAddress().getElectronicMailAddresses()) {
+                        metadata.add("CitedResponsiblePartyEMail 
",s.toString());
+                    }
+                }
+            }
+            if(i.getAbstract()!=null)
+                metadata.add("IdentificationInfoAbstract 
",i.getAbstract().toString());
+            for(Progress p:i.getStatus()) {
+                metadata.add("IdentificationInfoStatus ",p.name());
+            }
+            ArrayList<Format> formatArrayList= (ArrayList<Format>) 
i.getResourceFormats();
+            for(Format f:formatArrayList){
+                if(f.getName()!=null)
+                    metadata.add("ResourceFormatSpecificationAlternativeTitle 
",f.getName().toString());
+            }
+            CheckedHashSet<Locale> localeCheckedHashSet= 
(CheckedHashSet<Locale>) defaultDataIdentification.getLanguages();
+            for(Locale l:localeCheckedHashSet){
+                
metadata.add("IdentificationInfoLanguage-->",l.getDisplayLanguage(Locale.ENGLISH));
+            }
+            CodeListSet<TopicCategory> categoryList= 
(CodeListSet<TopicCategory>) defaultDataIdentification.getTopicCategories();
+            for(TopicCategory t:categoryList){
+                metadata.add("IdentificationInfoTopicCategory-->",t.name());
+            }
+            ArrayList<Keywords> keywordList= (ArrayList<Keywords>) 
i.getDescriptiveKeywords();
+            int j=1;
+            for(Keywords k:keywordList){
+                j++;
+                ArrayList<InternationalString> stringList= 
(ArrayList<InternationalString>) k.getKeywords();
+                for(InternationalString s:stringList){
+                    metadata.add("Keywords "+j ,s.toString());
+                }
+                if(k.getType()!=null)
+                    metadata.add("KeywordsType "+j,k.getType().name());
+                if(k.getThesaurusName()!=null && 
k.getThesaurusName().getTitle()!=null)
+                    metadata.add("ThesaurusNameTitle 
"+j,k.getThesaurusName().getTitle().toString());
+                if(k.getThesaurusName()!=null && 
k.getThesaurusName().getAlternateTitles()!=null)
+                    metadata.add("ThesaurusNameAlternativeTitle 
"+j,k.getThesaurusName().getAlternateTitles().toString());
+
+                ArrayList<CitationDate>citationDates= 
(ArrayList<CitationDate>) k.getThesaurusName().getDates();
+                for(CitationDate cd:citationDates) {
+                   if(cd.getDateType()!=null)
+                        metadata.add("ThesaurusNameDate 
",cd.getDateType().name() +"-->" + cd.getDate());
+                }
+            }
+            ArrayList<DefaultLegalConstraints> constraintList= 
(ArrayList<DefaultLegalConstraints>) i.getResourceConstraints();
+
+            for(DefaultLegalConstraints c:constraintList){
+                for(Restriction r:c.getAccessConstraints()){
+                    metadata.add("AccessContraints ",r.name());
+                }
+                for(InternationalString s:c.getOtherConstraints()){
+                    metadata.add("OtherConstraints ",s.toString());
+                }
+                for(Restriction r:c.getUseConstraints()) {
+                    metadata.add("UserConstraints ",r.name());
+                }
+              
+            }
+            Collection<Extent> extentList=((DefaultDataIdentification) 
i).getExtents();
+            for(Extent e:extentList){
+                ArrayList<GeographicExtent> geoElements= 
(ArrayList<GeographicExtent>) e.getGeographicElements();
+                for(GeographicExtent g:geoElements){
+
+                    if(g instanceof DefaultGeographicDescription){
+                        if(((DefaultGeographicDescription) 
g).getGeographicIdentifier()!=null && ((DefaultGeographicDescription) 
g).getGeographicIdentifier().getCode()!=null )
+                            metadata.add("GeographicIdentifierCode 
",((DefaultGeographicDescription) 
g).getGeographicIdentifier().getCode().toString());
+                        if(((DefaultGeographicDescription) 
g).getGeographicIdentifier()!=null && ((DefaultGeographicDescription) 
g).getGeographicIdentifier().getAuthority()!=null && 
((DefaultGeographicDescription) 
g).getGeographicIdentifier().getAuthority().getTitle()!=null )
+                        metadata.add("GeographicIdentifierAuthorityTitle 
",((DefaultGeographicDescription) 
g).getGeographicIdentifier().getAuthority().getTitle().toString());
+
+                        for(InternationalString 
s:((DefaultGeographicDescription) 
g).getGeographicIdentifier().getAuthority().getAlternateTitles()) {
+                            
metadata.add("GeographicIdentifierAuthorityAlternativeTitle ",s.toString());
+                        }
+                        for(CitationDate cd:((DefaultGeographicDescription) 
g).getGeographicIdentifier().getAuthority().getDates()){
+                            if(cd.getDateType()!=null && cd.getDate()!=null)
+                                
metadata.add("GeographicIdentifierAuthorityDate ",cd.getDateType().name()+" 
"+cd.getDate().toString());
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    private void getMetaDataDistributionInfo(Metadata metadata, 
DefaultMetadata defaultMetaData){
+        Distribution distribution=defaultMetaData.getDistributionInfo();
+        ArrayList<Format> distributionFormat= (ArrayList<Format>) 
distribution.getDistributionFormats();
+        for(Format f:distributionFormat){
+            if(f.getName()!=null)
+                metadata.add("DistributionFormatSpecificationAlternativeTitle 
",f.getName().toString());
+        }
+        ArrayList<Distributor> distributorList= (ArrayList<Distributor>) 
distribution.getDistributors();
+        for(Distributor d:distributorList){
+            if(d!=null && d.getDistributorContact()!=null && 
d.getDistributorContact().getRole()!=null)
+                metadata.add("Distributor Contact 
",d.getDistributorContact().getRole().name());
+            if(d!=null && d.getDistributorContact()!=null && 
d.getDistributorContact().getOrganisationName()!=null)
+                metadata.add("Distributor Organization Name 
",d.getDistributorContact().getOrganisationName().toString());
+        }
+        ArrayList<DigitalTransferOptions> transferOptionsList= 
(ArrayList<DigitalTransferOptions>) distribution.getTransferOptions();
+        for(DigitalTransferOptions d:transferOptionsList){
+            ArrayList<OnlineResource> onlineResourceList= 
(ArrayList<OnlineResource>) d.getOnLines();
+            for(OnlineResource or:onlineResourceList){
+                if(or.getLinkage()!=null)
+                    metadata.add("TransferOptionsOnlineLinkage 
",or.getLinkage().toString());
+                if(or.getProtocol()!=null)
+                    metadata.add("TransferOptionsOnlineProtocol 
",or.getProtocol());
+                if(or.getApplicationProfile()!=null)
+                    metadata.add("TransferOptionsOnlineProfile 
",or.getApplicationProfile());
+                if(or.getName()!=null)
+                    metadata.add("TransferOptionsOnlineName ",or.getName());
+                if(or.getDescription()!=null)
+                    metadata.add("TransferOptionsOnlineDescription 
",or.getDescription().toString());
+                if(or.getFunction()!=null)
+                    metadata.add("TransferOptionsOnlineFunction 
",or.getFunction().name());
+
+            }
+        }
+    }
+
+    private void getMetaDataDateInfo(Metadata metadata, DefaultMetadata 
defaultMetaData){
+        ArrayList<CitationDate> citationDateList= (ArrayList<CitationDate>) 
defaultMetaData.getDateInfo();
+        for(CitationDate c:citationDateList){
+            if(c.getDateType()!=null)
+                metadata.add("DateInfo ",c.getDateType().name()+" 
"+c.getDate());
+        }
+    }
+
+    private void getMetaDataResourceScope(Metadata metadata, DefaultMetadata 
defaultMetaData){
+        ArrayList<DefaultMetadataScope> scopeList= 
(ArrayList<DefaultMetadataScope>) defaultMetaData.getMetadataScopes();
+        for(DefaultMetadataScope d:scopeList){
+            if(d.getResourceScope()!=null)
+                metadata.add("MetaDataResourceScope 
",d.getResourceScope().name());
+        }
+    }
+
+    private void getMetaDataParentMetaDataTitle(Metadata metadata, 
DefaultMetadata defaultMetaData){
+        Citation parentMetaData=defaultMetaData.getParentMetadata();
+        if(parentMetaData!=null && parentMetaData.getTitle()!=null)
+            
metadata.add("ParentMetaDataTitle",parentMetaData.getTitle().toString());
+    }
+
+    private void getMetaDataIdetifierCode(Metadata metadata, DefaultMetadata 
defaultMetaData){
+        Identifier identifier= defaultMetaData.getMetadataIdentifier();
+        if(identifier!=null)
+            metadata.add("MetaDataIdentifierCode",identifier.getCode());
+    }
+
+    private void getMetaDataStandard(Metadata metadata, DefaultMetadata 
defaultMetaData){
+        ArrayList<Citation> citationList= (ArrayList<Citation>) 
defaultMetaData.getMetadataStandards();
+        for(Citation c:citationList){
+            if(c.getTitle()!=null)
+                metadata.add("MetaDataStandardTitle ",c.getTitle().toString());
+            if(c.getEdition()!=null)
+                metadata.add("MetaDataStandardEdition 
",c.getEdition().toString());
+        }
+    }
+}


Added: 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/grib/GribParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/grib/GribParser.java?rev=1725014&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/grib/GribParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/grib/GribParser.java
 Sat Jan 16 18:23:01 2016
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.grib;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.File;
+import java.util.Collections;
+import java.util.Set;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import ucar.nc2.Attribute;
+import ucar.nc2.Dimension;
+import ucar.nc2.NetcdfFile;
+import ucar.nc2.Variable;
+import ucar.nc2.dataset.NetcdfDataset;
+
+public class GribParser extends AbstractParser {
+
+    private static final long serialVersionUID = 7855458954474247655L;
+
+    public static final String GRIB_MIME_TYPE = "application/x-grib2";
+
+    private final Set<MediaType> SUPPORTED_TYPES =
+            Collections.singleton(MediaType.application("x-grib2"));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(InputStream stream, ContentHandler handler,
+                      Metadata metadata, ParseContext context) throws 
IOException,
+            SAXException, TikaException {
+
+        //Set MIME type as grib2
+        metadata.set(Metadata.CONTENT_TYPE, GRIB_MIME_TYPE);
+
+        TikaInputStream tis = TikaInputStream.get(stream, new 
TemporaryResources());
+        File gribFile = tis.getFile();
+
+        try {
+            NetcdfFile ncFile = 
NetcdfDataset.openFile(gribFile.getAbsolutePath(), null);
+
+            // first parse out the set of global attributes
+            for (Attribute attr : ncFile.getGlobalAttributes()) {
+                Property property = resolveMetadataKey(attr.getFullName());
+                if (attr.getDataType().isString()) {
+                    metadata.add(property, attr.getStringValue());
+                } else if (attr.getDataType().isNumeric()) {
+                    int value = attr.getNumericValue().intValue();
+                    metadata.add(property, String.valueOf(value));
+                }
+            }
+
+            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, 
metadata);
+
+            xhtml.startDocument();
+
+            xhtml.newline();
+            xhtml.startElement("ul");
+            xhtml.characters("dimensions:");
+            xhtml.newline();
+
+            for (Dimension dim : ncFile.getDimensions()){
+                xhtml.element("li", dim.getFullName() + "=" + 
String.valueOf(dim.getLength()) + ";");
+                xhtml.newline();
+            }
+
+            xhtml.startElement("ul");
+            xhtml.characters("variables:");
+            xhtml.newline();
+
+            for (Variable var : ncFile.getVariables()){
+                xhtml.element("p", String.valueOf(var.getDataType()) + 
var.getNameAndDimensions() + ";");
+                for(Attribute element : var.getAttributes()){
+                    xhtml.element("li", " :" + element + ";");
+                    xhtml.newline();
+                }
+            }
+            xhtml.endElement("ul");
+            xhtml.endElement("ul");
+            xhtml.endDocument();
+
+        } catch (IOException e) {
+            throw new TikaException("NetCDF parse error", e);
+        }
+    }
+
+    private Property resolveMetadataKey(String localName) {
+        if ("title".equals(localName)) {
+            return TikaCoreProperties.TITLE;
+        }
+        return Property.internalText(localName);
+    }
+
+}
\ No newline at end of file

Added: 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/hdf/HDFParser.java?rev=1725014&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
 Sat Jan 16 18:23:01 2016
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.hdf;
+
+//JDK imports
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.netcdf.NetCDFParser;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import ucar.nc2.Attribute;
+import ucar.nc2.Group;
+import ucar.nc2.NetcdfFile;
+
+/**
+ * 
+ * Since the {@link NetCDFParser} depends on the <a
+ * href="http://www.unidata.ucar.edu/software/netcdf-java"; >NetCDF-Java</a> 
API,
+ * we are able to use it to parse HDF files as well. See <a href=
+ * "http://www.unidata.ucar.edu/software/netcdf-java/formats/FileTypes.html";
+ * >this link</a> for more information.
+ */
+public class HDFParser extends AbstractParser {
+
+    /** Serial version UID */
+    private static final long serialVersionUID = 1091208208003437549L;
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+        Collections.singleton(MediaType.application("x-hdf"));
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see
+     * org.apache.tika.parser.netcdf.NetCDFParser#getSupportedTypes(org.apache
+     * .tika.parser.ParseContext)
+     */
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see
+     * org.apache.tika.parser.netcdf.NetCDFParser#parse(java.io.InputStream,
+     * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+     * org.apache.tika.parser.ParseContext)
+     */
+    public void parse(InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context) throws IOException,
+            SAXException, TikaException {
+        ByteArrayOutputStream os = new ByteArrayOutputStream();
+        IOUtils.copy(stream, os);
+
+        String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+        if (name == null) {
+            name = "";
+        }
+        try {
+            NetcdfFile ncFile = NetcdfFile.openInMemory(name, 
os.toByteArray());
+            unravelStringMet(ncFile, null, metadata);
+        } catch (IOException e) {
+            throw new TikaException("HDF parse error", e);
+        }
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        xhtml.endDocument();
+    }
+
+    protected void unravelStringMet(NetcdfFile ncFile, Group group, Metadata 
met) {
+        if (group == null) {
+            group = ncFile.getRootGroup();
+        }
+
+        // get file type
+        met.set("File-Type-Description", ncFile.getFileTypeDescription());
+        // unravel its string attrs
+        for (Attribute attribute : group.getAttributes()) {
+            if (attribute.isString()) {
+                met.add(attribute.getFullName(), attribute.getStringValue());
+            } else {
+                // try and cast its value to a string
+                met.add(attribute.getFullName(), String.valueOf(attribute
+                        .getNumericValue()));
+            }
+        }
+
+        for (Group g : group.getGroups()) {
+            unravelStringMet(ncFile, g, met);
+        }
+    }
+
+}

Added: 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java?rev=1725014&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
 Sat Jan 16 18:23:01 2016
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.isatab;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.detect.AutoDetectReader;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.SAXException;
+
+public class ISATabUtils {
+       
+       private static final ServiceLoader LOADER = new 
ServiceLoader(ISATabUtils.class.getClassLoader());
+       
+       /**
+        * INVESTIGATION
+        */
+       
+       // Investigation section.
+       private static final String[] sections = {
+                       "ONTOLOGY SOURCE REFERENCE",
+                       "INVESTIGATION", 
+                       "INVESTIGATION PUBLICATIONS", 
+                       "INVESTIGATION CONTACTS"
+               };
+       
+       // STUDY section (inside the Study section)
+       private static final String studySectionField = "STUDY";
+       
+       // Study File Name (inside the STUDY section)
+       private static final String studyFileNameField = "Study File Name";
+       
+       public static void parseInvestigation(InputStream stream, 
XHTMLContentHandler handler, Metadata metadata, ParseContext context, String 
studyFileName) throws IOException, TikaException, SAXException {
+               // Automatically detect the character encoding
+               try (AutoDetectReader reader = new AutoDetectReader(new 
CloseShieldInputStream(stream),
+                               metadata, context.get(ServiceLoader.class, 
LOADER))) {
+                       extractMetadata(reader, metadata, studyFileName);
+               }
+       }
+       
+       public static void parseInvestigation(InputStream stream, 
XHTMLContentHandler handler, Metadata metadata, ParseContext context) throws 
IOException, TikaException, SAXException {
+               parseInvestigation(stream, handler, metadata, context, null);
+       }
+       
+       public static void parseStudy(InputStream stream, XHTMLContentHandler 
xhtml, Metadata metadata, ParseContext context) throws IOException, 
TikaException, SAXException {
+               TikaInputStream tis = TikaInputStream.get(stream);
+               // Automatically detect the character encoding
+
+               try (AutoDetectReader reader = new AutoDetectReader(new 
CloseShieldInputStream(tis),
+                               metadata, context.get(ServiceLoader.class, 
LOADER));
+                        CSVParser csvParser = new CSVParser(reader, 
CSVFormat.TDF)) {
+                       Iterator<CSVRecord> iterator = csvParser.iterator();
+
+                       xhtml.startElement("table");
+
+                       xhtml.startElement("thead");
+                       if (iterator.hasNext()) {
+                               CSVRecord record = iterator.next();
+                               for (int i = 0; i < record.size(); i++) {
+                                       xhtml.startElement("th");
+                                       xhtml.characters(record.get(i));
+                                       xhtml.endElement("th");
+                               }
+                       }
+                       xhtml.endElement("thead");
+
+                       xhtml.startElement("tbody");
+                       while (iterator.hasNext()) {
+                               CSVRecord record = iterator.next();
+                               xhtml.startElement("tr");
+                               for (int j = 0; j < record.size(); j++) {
+                                       xhtml.startElement("td");
+                                       xhtml.characters(record.get(j));
+                                       xhtml.endElement("td");
+                               }
+                               xhtml.endElement("tr");
+                       }
+                       xhtml.endElement("tbody");
+
+                       xhtml.endElement("table");
+               }
+       }
+       
+       public static void parseAssay(InputStream stream, XHTMLContentHandler 
xhtml, Metadata metadata, ParseContext context) throws IOException, 
TikaException, SAXException {
+               TikaInputStream tis = TikaInputStream.get(stream);
+               
+               // Automatically detect the character encoding
+
+               try (AutoDetectReader reader = new AutoDetectReader(new 
CloseShieldInputStream(tis),
+                               metadata, context.get(ServiceLoader.class, 
LOADER));
+                        CSVParser csvParser = new CSVParser(reader, 
CSVFormat.TDF)) {
+                       xhtml.startElement("table");
+
+                       Iterator<CSVRecord> iterator = csvParser.iterator();
+
+                       xhtml.startElement("thead");
+                       if (iterator.hasNext()) {
+                               CSVRecord record = iterator.next();
+                               for (int i = 0; i < record.size(); i++) {
+                                       xhtml.startElement("th");
+                                       xhtml.characters(record.get(i));
+                                       xhtml.endElement("th");
+                               }
+                       }
+                       xhtml.endElement("thead");
+
+                       xhtml.startElement("tbody");
+                       while (iterator.hasNext()) {
+                               CSVRecord record = iterator.next();
+                               xhtml.startElement("tr");
+                               for (int j = 0; j < record.size(); j++) {
+                                       xhtml.startElement("td");
+                                       xhtml.characters(record.get(j));
+                                       xhtml.endElement("td");
+                               }
+                               xhtml.endElement("tr");
+                       }
+                       xhtml.endElement("tbody");
+
+                       xhtml.endElement("table");
+               }
+       }
+       
+       private static void extractMetadata(Reader reader, Metadata metadata, 
String studyFileName) throws IOException {
+               boolean investigationSection = false;
+               boolean studySection = false;
+               boolean studyTarget = false;
+                               
+               Map<String, String> map = new HashMap<String, String>();
+
+               try (CSVParser csvParser = new CSVParser(reader, 
CSVFormat.TDF)) {
+                       Iterator<CSVRecord> iterator = csvParser.iterator();
+
+                       while (iterator.hasNext()) {
+                               CSVRecord record = iterator.next();
+                               String field = record.get(0);
+                               if 
((field.toUpperCase(Locale.ENGLISH).equals(field)) && (record.size() == 1)) {
+                                       investigationSection = 
Arrays.asList(sections).contains(field);
+                                       studySection = (studyFileName != null) 
&& (field.equals(studySectionField));
+                               } else {
+                                       if (investigationSection) {
+                                               addMetadata(field, record, 
metadata);
+                                       } else if (studySection) {
+                                               if (studyTarget) {
+                                                       break;
+                                               }
+                                               String value = record.get(1);
+                                               map.put(field, value);
+                                               studyTarget = 
(field.equals(studyFileNameField)) && (value.equals(studyFileName));
+                                               if (studyTarget) {
+                                                       mapStudyToMetadata(map, 
metadata);
+                                                       studySection = false;
+                                               }
+                                       } else if (studyTarget) {
+                                               addMetadata(field, record, 
metadata);
+                                       }
+                               }
+                       }
+               } catch (IOException ioe) {
+                       throw ioe;
+               }
+       }
+       
+       private static void addMetadata(String field, CSVRecord record, 
Metadata metadata) {
+               if ((record ==null) || (record.size() <= 1)) {
+                       return;
+               }
+               
+               for (int i = 1; i < record.size(); i++) {
+                       metadata.add(field, record.get(i));
+               }
+       }
+       
+       private static void mapStudyToMetadata(Map<String, String> map, 
Metadata metadata) {
+               for (Map.Entry<String, String> entry : map.entrySet()) {
+                       metadata.add(entry.getKey(), entry.getValue());
+               }
+       }
+}

Added: 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java?rev=1725014&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java
 Sat Jan 16 18:23:01 2016
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.isatab;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class ISArchiveParser implements Parser {
+
+       /**
+        * Serial version UID
+        */
+       private static final long serialVersionUID = 3640809327541300229L;
+       
+       private final Set<MediaType> SUPPORTED_TYPES = 
Collections.singleton(MediaType.application("x-isatab"));
+       
+       private static String studyAssayFileNameField = "Study Assay File Name";
+       
+       private String location = null;
+       
+       private String studyFileName = null; 
+       
+       /**
+        * Default constructor.
+        */
+       public ISArchiveParser() {
+               this(null);
+       }
+       
+       /**
+        * Constructor that accepts the pathname of ISArchive folder.
+        * @param location pathname of ISArchive folder including ISA-Tab files
+        */
+       public ISArchiveParser(String location) {
+               if (location != null && !location.endsWith(File.separator)) {
+                       location += File.separator;
+               }
+               this.location = location;
+       }
+       
+       @Override
+       public Set<MediaType> getSupportedTypes(ParseContext context) {
+               return SUPPORTED_TYPES;
+       }
+
+       @Override
+       public void parse(InputStream stream, ContentHandler handler, Metadata 
metadata,
+                       ParseContext context) throws IOException, SAXException, 
TikaException {
+
+               TikaInputStream tis = TikaInputStream.get(stream);
+               if (this.location == null) {
+                       this.location = tis.getFile().getParent() + 
File.separator;
+               }
+               this.studyFileName = tis.getFile().getName();
+                
+               File locationFile = new File(location);
+               String[] investigationList = locationFile.list(new 
FilenameFilter() {
+                       
+                       @Override
+                       public boolean accept(File dir, String name) {
+                               return name.matches("i_.+\\.txt");
+                       }
+               });     
+               
+               XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, 
metadata);
+               xhtml.startDocument();
+               
+               parseInvestigation(investigationList, xhtml, metadata, context);
+               parseStudy(stream, xhtml, metadata, context);
+               parseAssay(xhtml, metadata, context);
+               
+               xhtml.endDocument();
+       }
+       
+       private void parseInvestigation(String[] investigationList, 
XHTMLContentHandler xhtml, Metadata metadata, ParseContext context) throws 
IOException, SAXException, TikaException {
+               if ((investigationList == null) || (investigationList.length == 
0)) {
+                       // TODO warning
+                       return;
+               }
+               if (investigationList.length > 1) {
+                       // TODO warning
+                       return;
+               }
+               
+               String investigation = investigationList[0]; // TODO add to 
metadata?
+               InputStream stream = TikaInputStream.get(new File(this.location 
+ investigation));
+               
+               ISATabUtils.parseInvestigation(stream, xhtml, metadata, 
context, this.studyFileName);
+               
+               xhtml.element("h1", "INVESTIGATION " + 
metadata.get("Investigation Identifier"));
+       }
+
+       private void parseStudy(InputStream stream, XHTMLContentHandler xhtml, 
Metadata metadata, ParseContext context) throws IOException, SAXException, 
TikaException {
+               xhtml.element("h2", "STUDY " + metadata.get("Study 
Identifier"));
+               
+               ISATabUtils.parseStudy(stream, xhtml, metadata, context);
+       }
+       
+       private void parseAssay(XHTMLContentHandler xhtml, Metadata metadata, 
ParseContext context) throws IOException, SAXException, TikaException {
+               for (String assayFileName : 
metadata.getValues(studyAssayFileNameField)) {
+                       xhtml.startElement("div");
+                       xhtml.element("h3", "ASSAY " + assayFileName);
+                       InputStream stream = TikaInputStream.get(new 
File(this.location + assayFileName));
+                       ISATabUtils.parseAssay(stream, xhtml, metadata, 
context);
+                       xhtml.endElement("div");
+               }
+       }
+}

Added: 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/mat/MatParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/mat/MatParser.java?rev=1725014&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/mat/MatParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/mat/MatParser.java
 Sat Jan 16 18:23:01 2016
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mat;
+
+//JDK imports
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+import java.util.Map;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+//JMatIO imports
+import com.jmatio.io.MatFileHeader;
+import com.jmatio.io.MatFileReader;
+import com.jmatio.types.MLArray;
+import com.jmatio.types.MLStructure;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+
+public class MatParser extends AbstractParser {
+
+    public static final String MATLAB_MIME_TYPE =
+            "application/x-matlab-data";
+
+    private final Set<MediaType> SUPPORTED_TYPES =
+            Collections.singleton(MediaType.application("x-matlab-data"));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context){
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(InputStream stream, ContentHandler handler, Metadata 
metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+
+        //Set MIME type as Matlab
+        metadata.set(Metadata.CONTENT_TYPE, MATLAB_MIME_TYPE);
+
+        try {
+            // Use TIS so we can spool a temp file for parsing.
+            TikaInputStream tis = TikaInputStream.get(stream);
+
+            //Extract information from header file
+            MatFileReader mfr = new MatFileReader(tis.getFile()); //input .mat 
file
+            MatFileHeader hdr = mfr.getMatFileHeader(); //.mat header 
information
+
+            // Example header: "MATLAB 5.0 MAT-file, Platform: MACI64, Created 
on: Sun Mar  2 23:41:57 2014"
+            String[] parts = hdr.getDescription().split(","); // Break header 
information into its parts
+
+            if (parts[2].contains("Created")) {
+                int lastIndex1 = parts[2].lastIndexOf("Created on:");
+                String dateCreated = parts[2].substring(lastIndex1 + "Created 
on:".length()).trim();
+                metadata.set("createdOn", dateCreated);
+            }
+
+            if (parts[1].contains("Platform")) {
+                int lastIndex2 = parts[1].lastIndexOf("Platform:");
+                String platform = parts[1].substring(lastIndex2 + 
"Platform:".length()).trim();
+                metadata.set("platform" , platform);
+            }
+
+            if (parts[0].contains("MATLAB")) {
+                metadata.set("fileType", parts[0]);
+            }
+
+            // Get endian indicator from header file
+            String endianBytes = new String(hdr.getEndianIndicator(), UTF_8); 
// Retrieve endian bytes and convert to string
+            String endianCode = String.valueOf(endianBytes.toCharArray()); // 
Convert bytes to characters to string
+            metadata.set("endian", endianCode);
+
+            //Text output      
+            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, 
metadata);
+            xhtml.startDocument();
+            xhtml.newline();
+            //Loop through each variable
+            for (Map.Entry<String, MLArray> entry : 
mfr.getContent().entrySet()) {
+                String varName = entry.getKey();
+                MLArray varData = entry.getValue();
+
+                xhtml.element("p", varName + ":" + String.valueOf(varData));
+
+                // If the variable is a structure, extract variable info from 
structure
+                if (varData.isStruct()){
+                    MLStructure mlStructure = (MLStructure) 
mfr.getMLArray(varName);
+                    xhtml.startElement("ul");
+                    xhtml.newline();
+                    for (MLArray element : mlStructure.getAllFields()){
+                        xhtml.startElement("li");
+                        xhtml.characters(String.valueOf(element));
+
+                        // If there is an embedded structure, extract variable 
info.
+                        if (element.isStruct()){
+                            xhtml.startElement("ul");
+                            // Should this actually be a recursive call?
+                            xhtml.element("li", element.contentToString());
+                            xhtml.endElement("ul");
+                        }
+
+                        xhtml.endElement("li");
+                    }
+                    xhtml.endElement("ul");
+                }
+            }
+            xhtml.endDocument();
+        } catch (IOException e) {
+            throw new TikaException("Error parsing Matlab file with 
MatParser", e);
+        }
+    }
+}
\ No newline at end of file

Added: 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java?rev=1725014&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
 Sat Jan 16 18:23:01 2016
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.netcdf;
+
+//JDK imports
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+import java.util.List;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import ucar.nc2.Attribute;
+import ucar.nc2.NetcdfFile;
+import ucar.nc2.Variable;
+import ucar.nc2.Dimension;
+
+/**
+ * A {@link Parser} for <a
+ * href="http://www.unidata.ucar.edu/software/netcdf/index.html";>NetCDF</a>
+ * files using the UCAR, MIT-licensed <a
+ * href="http://www.unidata.ucar.edu/software/netcdf-java/";>NetCDF for Java</a>
+ * API.
+ */
+public class NetCDFParser extends AbstractParser {
+
+    /**
+     * Serial version UID
+     */
+    private static final long serialVersionUID = -5940938274907708665L;
+
+    private final Set<MediaType> SUPPORTED_TYPES =
+            Collections.singleton(MediaType.application("x-netcdf"));
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see
+     * org.apache.tika.parser.Parser#getSupportedTypes(org.apache.tika.parser
+     * .ParseContext)
+     */
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+     * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+     * org.apache.tika.parser.ParseContext)
+     */
+    public void parse(InputStream stream, ContentHandler handler,
+                      Metadata metadata, ParseContext context) throws 
IOException,
+            SAXException, TikaException {
+
+        TikaInputStream tis = TikaInputStream.get(stream, new 
TemporaryResources());
+        try {
+            NetcdfFile ncFile = 
NetcdfFile.open(tis.getFile().getAbsolutePath());
+            metadata.set("File-Type-Description", 
ncFile.getFileTypeDescription());
+            // first parse out the set of global attributes
+            for (Attribute attr : ncFile.getGlobalAttributes()) {
+                Property property = resolveMetadataKey(attr.getFullName());
+                if (attr.getDataType().isString()) {
+                    metadata.add(property, attr.getStringValue());
+                } else if (attr.getDataType().isNumeric()) {
+                    int value = attr.getNumericValue().intValue();
+                    metadata.add(property, String.valueOf(value));
+                }
+            }
+
+
+            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, 
metadata);
+            xhtml.startDocument();
+            xhtml.newline();
+            xhtml.element("h1", "dimensions");
+            xhtml.startElement("ul");
+            xhtml.newline();
+            for (Dimension dim : ncFile.getDimensions()) {
+                xhtml.element("li", dim.getFullName() + " = " + 
dim.getLength());
+            }
+            xhtml.endElement("ul");
+
+            xhtml.element("h1", "variables");
+            xhtml.startElement("ul");
+            xhtml.newline();
+            for (Variable var : ncFile.getVariables()) {
+                xhtml.startElement("li");
+                xhtml.characters(var.getDataType() + " " + 
var.getNameAndDimensions());
+                xhtml.newline();
+                List<Attribute> attributes = var.getAttributes();
+                if (!attributes.isEmpty()) {
+                    xhtml.startElement("ul");
+                    for (Attribute element : attributes) {
+                        xhtml.element("li", element.toString());
+                    }
+                    xhtml.endElement("ul");
+                }
+                xhtml.endElement("li");
+            }
+            xhtml.endElement("ul");
+
+            xhtml.endDocument();
+
+        } catch (IOException e) {
+            throw new TikaException("NetCDF parse error", e);
+        }
+    }
+
+    private Property resolveMetadataKey(String localName) {
+        if ("title".equals(localName)) {
+            return TikaCoreProperties.TITLE;
+        }
+        return Property.internalText(localName);
+    }
+}
\ No newline at end of file

Added: 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/pot/PooledTimeSeriesParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/pot/PooledTimeSeriesParser.java?rev=1725014&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/pot/PooledTimeSeriesParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/java/org/apache/tika/parser/pot/PooledTimeSeriesParser.java
 Sat Jan 16 18:23:01 2016
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.pot;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.external.ExternalParser;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.BufferedReader;
+import java.util.logging.Logger;
+import org.apache.commons.exec.CommandLine;
+import org.apache.commons.exec.DefaultExecutor;
+import org.apache.commons.exec.ExecuteWatchdog;
+import org.apache.commons.exec.PumpStreamHandler;
+import org.apache.commons.exec.environment.EnvironmentUtils;
+import org.xml.sax.helpers.AttributesImpl;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+public class PooledTimeSeriesParser extends AbstractParser {
+
+  private static final long serialVersionUID = -2855917932512164988L;
+  private static final Set<MediaType> SUPPORTED_TYPES = Collections
+      .unmodifiableSet(new HashSet<MediaType>(Arrays.asList(new MediaType[] {
+          MediaType.video("avi"), MediaType.video("mp4")
+      // TODO: Add all supported video types
+          })));
+
+  private static final Logger LOG = 
Logger.getLogger(PooledTimeSeriesParser.class.getName());
+
+  public boolean isAvailable() {
+    return ExternalParser.check(
+        new String[] { "pooled-time-series", "--help" }, -1);
+  }
+
+  /**
+   * Returns the set of media types supported by this parser when used with the
+   * given parse context.
+   *
+   * @param context
+   *          parse context
+   * @return immutable set of media types
+   * @since Apache Tika 0.7
+   */
+  @Override
+  public Set<MediaType> getSupportedTypes(ParseContext context) {
+    return SUPPORTED_TYPES;
+  }
+
+  /**
+   * Parses a document stream into a sequence of XHTML SAX events. Fills in
+   * related document metadata in the given metadata object.
+   * <p>
+   * The given document stream is consumed but not closed by this method. The
+   * responsibility to close the stream remains on the caller.
+   * <p>
+   * Information about the parsing context can be passed in the context
+   * parameter. See the parser implementations for the kinds of context
+   * information they expect.
+   *
+   * @param stream
+   *          the document stream (input)
+   * @param handler
+   *          handler for the XHTML SAX events (output)
+   * @param metadata
+   *          document metadata (input and output)
+   * @param context
+   *          parse context
+   * @throws IOException
+   *           if the document stream could not be read
+   * @throws SAXException
+   *           if the SAX events could not be processed
+   * @throws TikaException
+   *           if the document could not be parsed
+   * @since Apache Tika 0.5
+   */
+  @Override
+  public void parse(InputStream stream, ContentHandler handler,
+      Metadata metadata, ParseContext context) throws IOException,
+      SAXException, TikaException {
+
+    if (!isAvailable()) {
+      LOG.warning(
+          "PooledTimeSeries not installed!");
+      return;
+    }
+
+    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+
+    TemporaryResources tmp = new TemporaryResources();
+    File output = null;
+    try {
+      TikaInputStream tikaStream = TikaInputStream.get(stream, tmp);
+      File input = tikaStream.getFile();
+      String cmdOutput = computePoT(input);
+      FileInputStream ofStream = new FileInputStream(new File(
+          input.getAbsoluteFile() + ".of.txt"));
+      FileInputStream ogStream = new FileInputStream(new File(
+          input.getAbsoluteFile() + ".hog.txt"));
+      extractHeaderOutput(ofStream, metadata, "of");
+      extractHeaderOutput(ogStream, metadata, "og");
+      xhtml.startDocument();
+      doExtract(ofStream, xhtml, "Histogram of Optical Flows (HOF)",
+          metadata.get("of_frames"), metadata.get("of_vecSize"));
+      doExtract(ogStream, xhtml, "Histogram of Oriented Gradients (HOG)",
+          metadata.get("og_frames"), metadata.get("og_vecSize"));
+      xhtml.endDocument();
+
+    } finally {
+      tmp.dispose();
+      if (output != null) {
+        output.delete();
+      }
+    }
+  }
+
+  private String computePoT(File input)
+      throws IOException, TikaException {
+
+    CommandLine cmdLine = new CommandLine("pooled-time-series");
+    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+    cmdLine.addArgument("-f");
+    cmdLine.addArgument(input.getAbsolutePath());
+    LOG.fine("Executing: " + cmdLine);
+    DefaultExecutor exec = new DefaultExecutor();
+    exec.setExitValue(0);
+    ExecuteWatchdog watchdog = new ExecuteWatchdog(60000);
+    exec.setWatchdog(watchdog);
+    PumpStreamHandler streamHandler = new PumpStreamHandler(outputStream);
+    exec.setStreamHandler(streamHandler);
+    int exitValue = exec
+        .execute(cmdLine, EnvironmentUtils.getProcEnvironment());
+    return outputStream.toString("UTF-8");
+
+  }
+
+  /**
+   * Reads the contents of the given stream and write it to the given XHTML
+   * content handler. The stream is closed once fully processed.
+   *
+   * @param stream
+   *          Stream where is the result of ocr
+   * @param xhtml
+   *          XHTML content handler
+   * @param tableTitle
+   *          The name of the matrix/table to display.
+   * @param frames
+   *          Number of frames read from the video.
+   * @param vecSize
+   *          Size of the OF or HOG vector.
+   * @throws SAXException
+   *           if the XHTML SAX events could not be handled
+   * @throws IOException
+   *           if an input error occurred
+   */
+  private void doExtract(InputStream stream, XHTMLContentHandler xhtml,
+      String tableTitle, String frames, String vecSize) throws SAXException,
+      IOException {
+    BufferedReader reader = new BufferedReader(new InputStreamReader(stream,
+        UTF_8));
+    String line = null;
+    AttributesImpl attributes = new AttributesImpl();
+    attributes.addAttribute("", "", "rows", "CDATA", frames);
+    attributes.addAttribute("", "", "cols", "CDATA", vecSize);
+
+    xhtml.startElement("h3");
+    xhtml.characters(tableTitle);
+    xhtml.endElement("h3");
+    xhtml.startElement("table", attributes);
+    while ((line = reader.readLine()) != null) {
+      xhtml.startElement("tr");
+      for (String val : line.split(" ")) {
+        xhtml.startElement("td");
+        xhtml.characters(val);
+        xhtml.endElement("td");
+      }
+      xhtml.endElement("tr");
+    }
+    xhtml.endElement("table");
+  }
+
+  private void extractHeaderOutput(InputStream stream, Metadata metadata,
+      String prefix) throws IOException {
+    BufferedReader reader = new BufferedReader(new InputStreamReader(stream,
+        UTF_8));
+    String line = reader.readLine();
+    String[] firstLine = line.split(" ");
+    String frames = firstLine[0];
+    String vecSize = firstLine[1];
+
+    if (prefix == null) {
+      prefix = "";
+    }
+    metadata.add(prefix + "_frames", frames);
+    metadata.add(prefix + "_vecSize", vecSize);
+  }
+
+}

Added: 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser?rev=1725014&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
 Sat Jan 16 18:23:01 2016
@@ -0,0 +1,28 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+
+#org.apache.tika.parser.ctakes.CTAKESParser
+org.apache.tika.parser.dif.DIFParser
+org.apache.tika.parser.gdal.GDALParser
+org.apache.tika.parser.geo.topic.GeoParser
+org.apache.tika.parser.geoinfo.GeographicInformationParser
+org.apache.tika.parser.grib.GribParser
+org.apache.tika.parser.hdf.HDFParser
+org.apache.tika.parser.isatab.ISArchiveParser
+org.apache.tika.parser.mat.MatParser
+org.apache.tika.parser.netcdf.NetCDFParser
+org.apache.tika.parser.pot.PooledTimeSeriesParser
+#org.apache.tika.parser.envi.EnviHeaderParser

Added: 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java?rev=1725014&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java
 Sat Jan 16 18:23:01 2016
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.dif;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+import java.io.InputStream;
+
+import static org.junit.Assert.assertEquals;
+
+public class DIFParserTest extends TikaTest {
+
+       @Test
+       public void testDifMetadata() throws Exception {
+               Parser parser = new DIFParser();
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = DIFParser.class.getResourceAsStream(
+                "/test-documents/Zamora2010.dif")) {
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+
+        
assertEquals(metadata.get("DIF-Entry_ID"),"00794186-48f9-11e3-9dcb-00c0f03d5b7c");
+        assertEquals(metadata.get("DIF-Metadata_Name"),"ACADIS IDN DIF");      
  
+
+        String content = handler.toString();
+        assertContains("Title: Zamora 2010 Using Sediment Geochemistry", 
content);
+        assertContains("Southernmost_Latitude : 78.833", content);
+        assertContains("Northernmost_Latitude : 79.016", content);
+        assertContains("Westernmost_Longitude : 11.64", content);
+        assertContains("Easternmost_Longitude : 13.34", content);
+       }
+}

Added: 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java?rev=1725014&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
 Sat Jan 16 18:23:01 2016
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.envi;
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.junit.Assert.assertNotNull;
+
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.ToXMLContentHandler;
+import org.junit.Test;
+
+/**
+ * Test cases to exercise the {@link EnviHeaderParser}.
+ */
+public class EnviHeaderParserTest {
+    @Test
+    public void testParseGlobalMetadata() throws Exception {
+        if (System.getProperty("java.version").startsWith("1.5")) {
+            return;
+        }
+
+        Parser parser = new EnviHeaderParser();
+        ToXMLContentHandler handler = new ToXMLContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = EnviHeaderParser.class.getResourceAsStream(
+                "/test-documents/envi_test_header.hdr")) {
+            assertNotNull("Test ENVI file not found", stream);
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+
+        // Check content of test file
+        String content = handler.toString();
+        assertContains("<body><p>ENVI</p>", content);
+        assertContains("<p>samples = 2400</p>", content);
+        assertContains("<p>lines   = 2400</p>", content);
+        assertContains("<p>map info = {Sinusoidal, 1.5000, 1.5000, 
-10007091.3643, 5559289.2856, 4.6331271653e+02, 4.6331271653e+02, , 
units=Meters}</p>", content);
+        assertContains("content=\"application/envi.hdr\"", content);
+        assertContains("projection info = {16, 6371007.2, 0.000000, 0.0, 0.0, 
Sinusoidal, units=Meters}", content);
+    }
+}

Added: 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java?rev=1725014&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-scientific-parser-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
 Sat Jan 16 18:23:01 2016
@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.gdal;
+
+//JDK imports
+
+import java.io.IOException;
+import java.io.InputStream;
+
+
+//Tika imports
+import org.apache.tika.TikaTest;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.external.ExternalParser;
+import org.apache.tika.sax.BodyContentHandler;
+
+//Junit imports
+import org.junit.Test;
+import org.xml.sax.SAXException;
+
+import static org.junit.Assert.fail;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assume.assumeTrue;
+
+/**
+ * Test harness for the GDAL parser.
+ */
+public class TestGDALParser extends TikaTest {
+
+    private boolean canRun() {
+        String[] checkCmd = {"gdalinfo"};
+        // If GDAL is not on the path, do not run the test.
+        return ExternalParser.check(checkCmd);
+    }
+
+  @Test
+  public void testParseBasicInfo() {
+    assumeTrue(canRun());
+    final String expectedDriver = "netCDF/Network Common Data Format";
+    final String expectedUpperRight = "512.0,    0.0";
+    final String expectedUpperLeft = "0.0,    0.0";
+    final String expectedLowerLeft = "0.0,  512.0";
+    final String expectedLowerRight = "512.0,  512.0";
+    final String expectedCoordinateSystem = "`'";
+    final String expectedSize = "512, 512";
+
+    GDALParser parser = new GDALParser();
+    InputStream stream = TestGDALParser.class
+        
.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
+    Metadata met = new Metadata();
+    BodyContentHandler handler = new BodyContentHandler();
+    try {
+      parser.parse(stream, handler, met, new ParseContext());
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail(e.getMessage());
+    }
+
+    assertNotNull(met);
+    assertNotNull(met.get("Driver"));
+    assertEquals(expectedDriver, met.get("Driver"));
+    assumeTrue(met.get("Files") != null);
+    assertNotNull(met.get("Coordinate System"));
+    assertEquals(expectedCoordinateSystem, met.get("Coordinate System"));
+    assertNotNull(met.get("Size"));
+    assertEquals(expectedSize, met.get("Size"));
+    assertNotNull(met.get("Upper Right"));
+    assertEquals(expectedUpperRight, met.get("Upper Right"));
+    assertNotNull(met.get("Upper Left"));
+    assertEquals(expectedUpperLeft, met.get("Upper Left"));
+    assertNotNull(met.get("Upper Right"));
+    assertEquals(expectedLowerRight, met.get("Lower Right"));
+    assertNotNull(met.get("Upper Right"));
+    assertEquals(expectedLowerLeft, met.get("Lower Left"));
+
+  }
+
+    @Test
+    public void testParseMetadata() {
+        assumeTrue(canRun());
+        final String expectedNcInst = "NCAR (National Center for Atmospheric 
Research, Boulder, CO, USA)";
+        final String expectedModelNameEnglish = "NCAR CCSM";
+        final String expectedProgramId = "Source file unknown Version unknown 
Date unknown";
+        final String expectedProjectId = "IPCC Fourth Assessment";
+        final String expectedRealization = "1";
+        final String expectedTitle = "model output prepared for IPCC AR4";
+        final String expectedSub8Name = "\":ua";
+        final String expectedSub8Desc = "[1x17x128x256] eastward_wind (32-bit 
floating-point)";
+
+        GDALParser parser = new GDALParser();
+        InputStream stream = TestGDALParser.class
+                
.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
+        Metadata met = new Metadata();
+        BodyContentHandler handler = new BodyContentHandler();
+        try {
+            parser.parse(stream, handler, met, new ParseContext());
+            assertNotNull(met);
+            assertNotNull(met.get("NC_GLOBAL#institution"));
+            assertEquals(expectedNcInst, met.get("NC_GLOBAL#institution"));
+            assertNotNull(met.get("NC_GLOBAL#model_name_english"));
+            assertEquals(expectedModelNameEnglish,
+                    met.get("NC_GLOBAL#model_name_english"));
+            assertNotNull(met.get("NC_GLOBAL#prg_ID"));
+            assertEquals(expectedProgramId, met.get("NC_GLOBAL#prg_ID"));
+            assertNotNull(met.get("NC_GLOBAL#prg_ID"));
+            assertEquals(expectedProgramId, met.get("NC_GLOBAL#prg_ID"));
+            assertNotNull(met.get("NC_GLOBAL#project_id"));
+            assertEquals(expectedProjectId, met.get("NC_GLOBAL#project_id"));
+            assertNotNull(met.get("NC_GLOBAL#realization"));
+            assertEquals(expectedRealization, 
met.get("NC_GLOBAL#realization"));
+            assertNotNull(met.get("NC_GLOBAL#title"));
+            assertEquals(expectedTitle, met.get("NC_GLOBAL#title"));
+            assertNotNull(met.get("SUBDATASET_8_NAME"));
+            
assertTrue(met.get("SUBDATASET_8_NAME").endsWith(expectedSub8Name));
+            assertNotNull(met.get("SUBDATASET_8_DESC"));
+            assertEquals(expectedSub8Desc, met.get("SUBDATASET_8_DESC"));
+        } catch (Exception e) {
+            e.printStackTrace();
+            fail(e.getMessage());
+        }
+    }
+
+    @Test
+    public void testParseFITS() {
+        String fitsFilename = "/test-documents/WFPC2u5780205r_c0fx.fits";
+
+        assumeTrue(canRun());
+        // If the exit code is 1 (meaning FITS isn't supported by the 
installed version of gdalinfo, don't run this test.
+        String[] fitsCommand = {"gdalinfo", 
TestGDALParser.class.getResource(fitsFilename).getPath()};
+        assumeTrue(ExternalParser.check(fitsCommand, 1));
+
+        String expectedAllgMin = "-7.319537E1";
+        String expectedAtodcorr = "COMPLETE";
+        String expectedAtodfile = "uref$dbu1405iu.r1h";
+        String expectedCalVersion = "                        ";
+        String expectedCalibDef = "1466";
+
+        GDALParser parser = new GDALParser();
+        InputStream stream = TestGDALParser.class
+                .getResourceAsStream(fitsFilename);
+        Metadata met = new Metadata();
+        BodyContentHandler handler = new BodyContentHandler();
+        try {
+            parser.parse(stream, handler, met, new ParseContext());
+            assertNotNull(met);
+            assertNotNull(met.get("ALLG-MIN"));
+            assertEquals(expectedAllgMin, met.get("ALLG-MIN"));
+            assertNotNull(met.get("ATODCORR"));
+            assertEquals(expectedAtodcorr, met.get("ATODCORR"));
+            assertNotNull(met.get("ATODFILE"));
+            assertEquals(expectedAtodfile, met.get("ATODFILE"));
+            assertNotNull(met.get("CAL_VER"));
+            assertEquals(expectedCalVersion, met.get("CAL_VER"));
+            assertNotNull(met.get("CALIBDEF"));
+            assertEquals(expectedCalibDef, met.get("CALIBDEF"));
+
+        } catch (Exception e) {
+            e.printStackTrace();
+            fail(e.getMessage());
+        }
+    }
+}

svn commit: r1725014 [23/28] - in /tika/branches/2.x: tika-parser-bundles/tika-multimedia-bundle/ tika-parser-modules/ tika-parser-modules/tika-advanced-module/ tika-parser-modules/tika-advanced-parser-module/ tika-parser-modules/tika-advanced-parser-m...

Reply via email to