Author: mattmann
Date: Sat Feb 28 17:30:35 2015
New Revision: 1662970

URL: http://svn.apache.org/r1662970
Log:
Fix for TIKA-1561 GCMD Directory Interchange Format (.dif) identification 
contributed by LukeLiush <[email protected]>. This closes #32.

Added:
    
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif
    
tika/trunk/tika-parsers/src/test/resources/test-documents/active_layer_arcss_grid_barrow_alaska_2012.dif
    
tika/trunk/tika-parsers/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
Modified:
    tika/trunk/CHANGES.txt
    
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java

Modified: tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1662970&r1=1662969&r2=1662970&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Sat Feb 28 17:30:35 2015
@@ -1,5 +1,8 @@
 Release 1.8 - Current Development
 
+  * Detect Global Change Master Directory (GCMD) Directory
+    Interchange Format (DIF) files (TIKA-1561).
+
   * Parsers and other services can now be disabled with a
     blacklist META-INF file (TIKA-1558).
 

Modified: 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1662970&r1=1662969&r2=1662970&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
(original)
+++ 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
Sat Feb 28 17:30:35 2015
@@ -5017,12 +5017,26 @@
   <mime-type type="multipart/signed"/>
   <mime-type type="multipart/voice-message"/>
 
+  <mime-type type="text/dif+xml">
+    <root-XML localName="DIF"/>
+    <root-XML localName="DIF" 
namespaceURI="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/"/>
+    <glob pattern="*.dif"/>
+    <sub-class-of type="application/xml"/>
+  </mime-type>
+
   <mime-type type="text/x-actionscript">
     <_comment>ActionScript source code</_comment>
     <glob pattern="*.as"/>
     <sub-class-of type="text/plain"/>
   </mime-type>
 
+  <mime-type type="text/dif+xml">
+    <root-XML localName="DIF"/>
+    <root-XML localName="DIF" 
namespaceURI="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/"/>
+    <glob pattern="*.dif"/>
+    <sub-class-of type="application/xml"/>
+  </mime-type>
+
   <mime-type type="text/x-ada">
     <_comment>Ada source code</_comment>
     <glob pattern="*.ada"/>

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java?rev=1662970&r1=1662969&r2=1662970&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java 
(original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java 
Sat Feb 28 17:30:35 2015
@@ -845,7 +845,7 @@ public class TikaDetectionTest {
         assertEquals("application/x-grib", tika.detect("x.grb"));
         assertEquals("application/x-grib", tika.detect("x.grb1"));
         assertEquals("application/x-grib", tika.detect("x.grb2"));
-        
+        assertEquals("text/dif+xml", tika.detect("x.dif"));
     }
 
 }

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java?rev=1662970&r1=1662969&r2=1662970&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java 
(original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java 
Sat Feb 28 17:30:35 2015
@@ -74,6 +74,9 @@ public class MimeDetectionTest {
         testFile("image/cgm", "plotutils-bin-cgm-v3.cgm");
         // test HTML detection of malformed file, previously identified as 
image/cgm (TIKA-1170)
         testFile("text/html", "test-malformed-header.html.bin");
+        
+        //test GCMD Directory Interchange Format (.dif) TIKA-1561
+        testFile("text/dif+xml", "brwNIMS_2014.dif");
     }
 
     @Test

Added: 
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif?rev=1662970&view=auto
==============================================================================
--- 
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif 
(added)
+++ 
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif 
Sat Feb 28 17:30:35 2015
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+        <DIF xmlns="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/ 
http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.8.4.xsd";>
+          <Entry_ID>02a6301c-3ab3-11e4-8ee7-00c0f03d5b7c</Entry_ID>
+          <Entry_Title>Barrow Logger Data NIMS 2014</Entry_Title>
+
+          <Parameters>
+            <Category>EARTH SCIENCE</Category>
+            <Topic>BIOSPHERE</Topic>
+            <Term>ECOLOGICAL DYNAMICS</Term>
+          </Parameters>
+
+
+          <Spatial_Coverage>
+            <Southernmost_Latitude>70</Southernmost_Latitude>
+            <Northernmost_Latitude>72</Northernmost_Latitude>
+            <Westernmost_Longitude>-162</Westernmost_Longitude>
+            <Easternmost_Longitude>-150</Easternmost_Longitude>
+          </Spatial_Coverage>
+
+          <Data_Center>
+            <Data_Center_Name>
+              <Short_Name>ACADIS</Short_Name>
+              <Long_Name>Advanced Cooperative Arctic Data and Information 
Service</Long_Name>
+            </Data_Center_Name>
+            <Data_Center_URL>http://www.aoncadis.org/</Data_Center_URL>
+            <Personnel>
+              <Role>DATA CENTER CONTACT</Role>
+              <First_Name>ACADIS</First_Name>
+              <Last_Name>User Services</Last_Name>
+              <Contact_Address>
+                <Address>NCAR/CISL</Address>
+                <Address>P.O. Box 3000</Address>
+                <City>Boulder</City>
+                <Province_or_State>CO</Province_or_State>
+                <Postal_Code>80307</Postal_Code>
+                <Country>USA</Country>
+              </Contact_Address>
+            </Personnel>
+          </Data_Center>
+
+          <Summary>
+            <Abstract>Logger records from the Networked Info-mechanical 
Systems (NIMS), Transect length: ~50m The data was recorded using a CR3000 
logger. The sensor trolley was equipped with instruments for recording the 
distance to vegetation canopy (SR50a Sonic Distance, Campbell Scientific), up- 
and downwelling short- and longwave radiation (CNR4 net radiometer, Kipp &amp; 
Zonen), air temperature and surface temperature (SI-111 IR radiometer, Apogee 
Instruments Inc.) and spectral reflection (Jaz Combo-2, Ocean Optics; 
GreenSeeker RT100 (505), NTech).</Abstract>
+          </Summary>
+
+          <Related_URL>
+            <URL_Content_Type>
+              <Type>GET DATA</Type>
+            </URL_Content_Type>
+            
<URL>http://www.aoncadis.org/dataset/id/02a6301c-3ab3-11e4-8ee7-00c0f03d5b7c.html</URL>
+            <Description>Data Center top-level access page for this 
resource</Description>
+          </Related_URL>
+
+          <Metadata_Name>ACADIS IDN DIF</Metadata_Name>
+          <Metadata_Version>9.8.4</Metadata_Version>
+          <Last_DIF_Revision_Date>2015-02-05</Last_DIF_Revision_Date>
+        </DIF>

Added: 
tika/trunk/tika-parsers/src/test/resources/test-documents/active_layer_arcss_grid_barrow_alaska_2012.dif
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/active_layer_arcss_grid_barrow_alaska_2012.dif?rev=1662970&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/resources/test-documents/active_layer_arcss_grid_barrow_alaska_2012.dif
 (added)
+++ 
tika/trunk/tika-parsers/src/test/resources/test-documents/active_layer_arcss_grid_barrow_alaska_2012.dif
 Sat Feb 28 17:30:35 2015
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+        <DIF xmlns="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/ 
http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.8.4.xsd";>
+          <Entry_ID>0091cf0c-7ad3-11e2-851e-00c0f03d5b7c</Entry_ID>
+          <Entry_Title>Active Layer ARCSS grid Barrow, Alaska 
2012</Entry_Title>
+
+          <Parameters>
+            <Category>EARTH SCIENCE</Category>
+            <Topic>CRYOSPHERE</Topic>
+            <Term>FROZEN GROUND</Term>
+            <Variable_Level_1>ACTIVE LAYER</Variable_Level_1>
+          </Parameters>
+
+          <Temporal_Coverage>
+            <Start_Date>2012-06-09</Start_Date>
+            <Stop_Date>2012-08-18</Stop_Date>
+          </Temporal_Coverage>
+
+          <Spatial_Coverage>
+            <Southernmost_Latitude>71</Southernmost_Latitude>
+            <Northernmost_Latitude>71.5</Northernmost_Latitude>
+            <Westernmost_Longitude>-156.6</Westernmost_Longitude>
+            <Easternmost_Longitude>-156.5</Easternmost_Longitude>
+          </Spatial_Coverage>
+
+          <Data_Center>
+            <Data_Center_Name>
+              <Short_Name>ACADIS</Short_Name>
+              <Long_Name>Advanced Cooperative Arctic Data and Information 
Service</Long_Name>
+            </Data_Center_Name>
+            <Data_Center_URL>http://www.aoncadis.org/</Data_Center_URL>
+            <Personnel>
+              <Role>DATA CENTER CONTACT</Role>
+              <First_Name>ACADIS</First_Name>
+              <Last_Name>User Services</Last_Name>
+              <Contact_Address>
+                <Address>NCAR/CISL</Address>
+                <Address>P.O. Box 3000</Address>
+                <City>Boulder</City>
+                <Province_or_State>CO</Province_or_State>
+                <Postal_Code>80307</Postal_Code>
+                <Country>USA</Country>
+              </Contact_Address>
+            </Personnel>
+          </Data_Center>
+
+          <Summary>
+            <Abstract>Active Layer measurements were taken on a 30 plot subset 
within the Arctic System Science (ARCSS) Grid in Barrow, Alaska. Each 
measurement was taken on the north eastern-most corner of each plot. The chosen 
subset was located from D2-D7 and H2-H7. The Systems Ecology Lab (SEL) 
lab&apos;s Circumpolar Active Layer Monitoring (CALM) depth probe was used. 
Depth was measured on the probe as the distance from the frozen active layer to 
the top of the surface of the vegetation. If water was present, then it was 
measured to the top of the biomass.</Abstract>
+          </Summary>
+
+          <Related_URL>
+            <URL_Content_Type>
+              <Type>GET DATA</Type>
+            </URL_Content_Type>
+            
<URL>http://www.aoncadis.org/dataset/id/0091cf0c-7ad3-11e2-851e-00c0f03d5b7c.html</URL>
+            <Description>Data Center top-level access page for this 
resource</Description>
+          </Related_URL>
+
+          <Metadata_Name>ACADIS IDN DIF</Metadata_Name>
+          <Metadata_Version>9.8.4</Metadata_Version>
+          <Last_DIF_Revision_Date>2015-02-05</Last_DIF_Revision_Date>
+        </DIF>

Added: 
tika/trunk/tika-parsers/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif?rev=1662970&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
 (added)
+++ 
tika/trunk/tika-parsers/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
 Sat Feb 28 17:30:35 2015
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="UTF-8"?>
+        <DIF xmlns="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/ 
http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.8.4.xsd";>
+          <Entry_ID>005f3222-7548-11e2-851e-00c0f03d5b7c</Entry_ID>
+          <Entry_Title>Carbon Isotopic Values of Alkanes Extracted from 
Paleosols</Entry_Title>
+
+          <Parameters>
+            <Category>EARTH SCIENCE</Category>
+            <Topic>PALEOCLIMATE</Topic>
+            <Term>LAND RECORDS</Term>
+            <Variable_Level_1>PALEOSOLS</Variable_Level_1>
+          </Parameters>
+          <Parameters>
+            <Category>EARTH SCIENCE</Category>
+            <Topic>LAND SURFACE</Topic>
+            <Term>SOILS</Term>
+            <Variable_Level_1>CARBON</Variable_Level_1>
+          </Parameters>
+          <Parameters>
+            <Category>EARTH SCIENCE</Category>
+            <Topic>PALEOCLIMATE</Topic>
+            <Term>LAND RECORDS</Term>
+            <Variable_Level_1>ISOTOPES</Variable_Level_1>
+          </Parameters>
+          <Parameters>
+            <Category>EARTH SCIENCE</Category>
+            <Topic>BIOSPHERE</Topic>
+            <Term>ECOLOGICAL DYNAMICS</Term>
+            <Variable_Level_1>ECOSYSTEM FUNCTIONS</Variable_Level_1>
+            <Variable_Level_2>BIOGEOCHEMICAL CYCLES</Variable_Level_2>
+          </Parameters>
+          <Parameters>
+            <Category>EARTH SCIENCE</Category>
+            <Topic>SOLID EARTH</Topic>
+            <Term>GEOCHEMISTRY</Term>
+            <Variable_Level_1>BIOGEOCHEMICAL PROCESSES</Variable_Level_1>
+          </Parameters>
+
+
+          <Spatial_Coverage>
+            <Southernmost_Latitude>66.56</Southernmost_Latitude>
+            <Northernmost_Latitude>90</Northernmost_Latitude>
+            <Westernmost_Longitude>-180</Westernmost_Longitude>
+            <Easternmost_Longitude>180</Easternmost_Longitude>
+          </Spatial_Coverage>
+
+          <Data_Center>
+            <Data_Center_Name>
+              <Short_Name>ACADIS</Short_Name>
+              <Long_Name>Advanced Cooperative Arctic Data and Information 
Service</Long_Name>
+            </Data_Center_Name>
+            <Data_Center_URL>http://www.aoncadis.org/</Data_Center_URL>
+            <Personnel>
+              <Role>DATA CENTER CONTACT</Role>
+              <First_Name>ACADIS</First_Name>
+              <Last_Name>User Services</Last_Name>
+              <Contact_Address>
+                <Address>NCAR/CISL</Address>
+                <Address>P.O. Box 3000</Address>
+                <City>Boulder</City>
+                <Province_or_State>CO</Province_or_State>
+                <Postal_Code>80307</Postal_Code>
+                <Country>USA</Country>
+              </Contact_Address>
+            </Personnel>
+          </Data_Center>
+
+          <Summary>
+            <Abstract>Dataset consists of compound specific carbon isotopic 
values of alkanes
+extracted from paleosols. Values represent the mean of duplicate
+measurements.</Abstract>
+          </Summary>
+
+          <Related_URL>
+            <URL_Content_Type>
+              <Type>GET DATA</Type>
+            </URL_Content_Type>
+            
<URL>http://www.aoncadis.org/dataset/id/005f3222-7548-11e2-851e-00c0f03d5b7c.html</URL>
+            <Description>Data Center top-level access page for this 
resource</Description>
+          </Related_URL>
+
+          <Metadata_Name>ACADIS IDN DIF</Metadata_Name>
+          <Metadata_Version>9.8.4</Metadata_Version>
+          <Last_DIF_Revision_Date>2015-02-05</Last_DIF_Revision_Date>
+        </DIF>


Reply via email to