Author: mattmann
Date: Sat Feb 28 17:30:35 2015
New Revision: 1662970
URL: http://svn.apache.org/r1662970
Log:
Fix for TIKA-1561 GCMD Directory Interchange Format (.dif) identification
contributed by LukeLiush <[email protected]>. This closes #32.
Added:
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif
tika/trunk/tika-parsers/src/test/resources/test-documents/active_layer_arcss_grid_barrow_alaska_2012.dif
tika/trunk/tika-parsers/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
Modified: tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1662970&r1=1662969&r2=1662970&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Sat Feb 28 17:30:35 2015
@@ -1,5 +1,8 @@
Release 1.8 - Current Development
+ * Detect Global Change Master Directory (GCMD) Directory
+ Interchange Format (DIF) files (TIKA-1561).
+
* Parsers and other services can now be disabled with a
blacklist META-INF file (TIKA-1558).
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1662970&r1=1662969&r2=1662970&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
(original)
+++
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
Sat Feb 28 17:30:35 2015
@@ -5017,12 +5017,26 @@
<mime-type type="multipart/signed"/>
<mime-type type="multipart/voice-message"/>
+ <mime-type type="text/dif+xml">
+ <root-XML localName="DIF"/>
+ <root-XML localName="DIF"
namespaceURI="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/"/>
+ <glob pattern="*.dif"/>
+ <sub-class-of type="application/xml"/>
+ </mime-type>
+
<mime-type type="text/x-actionscript">
<_comment>ActionScript source code</_comment>
<glob pattern="*.as"/>
<sub-class-of type="text/plain"/>
</mime-type>
+ <mime-type type="text/dif+xml">
+ <root-XML localName="DIF"/>
+ <root-XML localName="DIF"
namespaceURI="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/"/>
+ <glob pattern="*.dif"/>
+ <sub-class-of type="application/xml"/>
+ </mime-type>
+
<mime-type type="text/x-ada">
<_comment>Ada source code</_comment>
<glob pattern="*.ada"/>
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java?rev=1662970&r1=1662969&r2=1662970&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
(original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
Sat Feb 28 17:30:35 2015
@@ -845,7 +845,7 @@ public class TikaDetectionTest {
assertEquals("application/x-grib", tika.detect("x.grb"));
assertEquals("application/x-grib", tika.detect("x.grb1"));
assertEquals("application/x-grib", tika.detect("x.grb2"));
-
+ assertEquals("text/dif+xml", tika.detect("x.dif"));
}
}
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java?rev=1662970&r1=1662969&r2=1662970&view=diff
==============================================================================
---
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
(original)
+++
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
Sat Feb 28 17:30:35 2015
@@ -74,6 +74,9 @@ public class MimeDetectionTest {
testFile("image/cgm", "plotutils-bin-cgm-v3.cgm");
// test HTML detection of malformed file, previously identified as
image/cgm (TIKA-1170)
testFile("text/html", "test-malformed-header.html.bin");
+
+ //test GCMD Directory Interchange Format (.dif) TIKA-1561
+ testFile("text/dif+xml", "brwNIMS_2014.dif");
}
@Test
Added:
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif?rev=1662970&view=auto
==============================================================================
---
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif
(added)
+++
tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/brwNIMS_2014.dif
Sat Feb 28 17:30:35 2015
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+ <DIF xmlns="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/
http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.8.4.xsd">
+ <Entry_ID>02a6301c-3ab3-11e4-8ee7-00c0f03d5b7c</Entry_ID>
+ <Entry_Title>Barrow Logger Data NIMS 2014</Entry_Title>
+
+ <Parameters>
+ <Category>EARTH SCIENCE</Category>
+ <Topic>BIOSPHERE</Topic>
+ <Term>ECOLOGICAL DYNAMICS</Term>
+ </Parameters>
+
+
+ <Spatial_Coverage>
+ <Southernmost_Latitude>70</Southernmost_Latitude>
+ <Northernmost_Latitude>72</Northernmost_Latitude>
+ <Westernmost_Longitude>-162</Westernmost_Longitude>
+ <Easternmost_Longitude>-150</Easternmost_Longitude>
+ </Spatial_Coverage>
+
+ <Data_Center>
+ <Data_Center_Name>
+ <Short_Name>ACADIS</Short_Name>
+ <Long_Name>Advanced Cooperative Arctic Data and Information
Service</Long_Name>
+ </Data_Center_Name>
+ <Data_Center_URL>http://www.aoncadis.org/</Data_Center_URL>
+ <Personnel>
+ <Role>DATA CENTER CONTACT</Role>
+ <First_Name>ACADIS</First_Name>
+ <Last_Name>User Services</Last_Name>
+ <Contact_Address>
+ <Address>NCAR/CISL</Address>
+ <Address>P.O. Box 3000</Address>
+ <City>Boulder</City>
+ <Province_or_State>CO</Province_or_State>
+ <Postal_Code>80307</Postal_Code>
+ <Country>USA</Country>
+ </Contact_Address>
+ </Personnel>
+ </Data_Center>
+
+ <Summary>
+ <Abstract>Logger records from the Networked Info-mechanical
Systems (NIMS), Transect length: ~50m The data was recorded using a CR3000
logger. The sensor trolley was equipped with instruments for recording the
distance to vegetation canopy (SR50a Sonic Distance, Campbell Scientific), up-
and downwelling short- and longwave radiation (CNR4 net radiometer, Kipp &
Zonen), air temperature and surface temperature (SI-111 IR radiometer, Apogee
Instruments Inc.) and spectral reflection (Jaz Combo-2, Ocean Optics;
GreenSeeker RT100 (505), NTech).</Abstract>
+ </Summary>
+
+ <Related_URL>
+ <URL_Content_Type>
+ <Type>GET DATA</Type>
+ </URL_Content_Type>
+
<URL>http://www.aoncadis.org/dataset/id/02a6301c-3ab3-11e4-8ee7-00c0f03d5b7c.html</URL>
+ <Description>Data Center top-level access page for this
resource</Description>
+ </Related_URL>
+
+ <Metadata_Name>ACADIS IDN DIF</Metadata_Name>
+ <Metadata_Version>9.8.4</Metadata_Version>
+ <Last_DIF_Revision_Date>2015-02-05</Last_DIF_Revision_Date>
+ </DIF>
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/active_layer_arcss_grid_barrow_alaska_2012.dif
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/active_layer_arcss_grid_barrow_alaska_2012.dif?rev=1662970&view=auto
==============================================================================
---
tika/trunk/tika-parsers/src/test/resources/test-documents/active_layer_arcss_grid_barrow_alaska_2012.dif
(added)
+++
tika/trunk/tika-parsers/src/test/resources/test-documents/active_layer_arcss_grid_barrow_alaska_2012.dif
Sat Feb 28 17:30:35 2015
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+ <DIF xmlns="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/
http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.8.4.xsd">
+ <Entry_ID>0091cf0c-7ad3-11e2-851e-00c0f03d5b7c</Entry_ID>
+ <Entry_Title>Active Layer ARCSS grid Barrow, Alaska
2012</Entry_Title>
+
+ <Parameters>
+ <Category>EARTH SCIENCE</Category>
+ <Topic>CRYOSPHERE</Topic>
+ <Term>FROZEN GROUND</Term>
+ <Variable_Level_1>ACTIVE LAYER</Variable_Level_1>
+ </Parameters>
+
+ <Temporal_Coverage>
+ <Start_Date>2012-06-09</Start_Date>
+ <Stop_Date>2012-08-18</Stop_Date>
+ </Temporal_Coverage>
+
+ <Spatial_Coverage>
+ <Southernmost_Latitude>71</Southernmost_Latitude>
+ <Northernmost_Latitude>71.5</Northernmost_Latitude>
+ <Westernmost_Longitude>-156.6</Westernmost_Longitude>
+ <Easternmost_Longitude>-156.5</Easternmost_Longitude>
+ </Spatial_Coverage>
+
+ <Data_Center>
+ <Data_Center_Name>
+ <Short_Name>ACADIS</Short_Name>
+ <Long_Name>Advanced Cooperative Arctic Data and Information
Service</Long_Name>
+ </Data_Center_Name>
+ <Data_Center_URL>http://www.aoncadis.org/</Data_Center_URL>
+ <Personnel>
+ <Role>DATA CENTER CONTACT</Role>
+ <First_Name>ACADIS</First_Name>
+ <Last_Name>User Services</Last_Name>
+ <Contact_Address>
+ <Address>NCAR/CISL</Address>
+ <Address>P.O. Box 3000</Address>
+ <City>Boulder</City>
+ <Province_or_State>CO</Province_or_State>
+ <Postal_Code>80307</Postal_Code>
+ <Country>USA</Country>
+ </Contact_Address>
+ </Personnel>
+ </Data_Center>
+
+ <Summary>
+ <Abstract>Active Layer measurements were taken on a 30 plot subset
within the Arctic System Science (ARCSS) Grid in Barrow, Alaska. Each
measurement was taken on the north eastern-most corner of each plot. The chosen
subset was located from D2-D7 and H2-H7. The Systems Ecology Lab (SEL)
lab's Circumpolar Active Layer Monitoring (CALM) depth probe was used.
Depth was measured on the probe as the distance from the frozen active layer to
the top of the surface of the vegetation. If water was present, then it was
measured to the top of the biomass.</Abstract>
+ </Summary>
+
+ <Related_URL>
+ <URL_Content_Type>
+ <Type>GET DATA</Type>
+ </URL_Content_Type>
+
<URL>http://www.aoncadis.org/dataset/id/0091cf0c-7ad3-11e2-851e-00c0f03d5b7c.html</URL>
+ <Description>Data Center top-level access page for this
resource</Description>
+ </Related_URL>
+
+ <Metadata_Name>ACADIS IDN DIF</Metadata_Name>
+ <Metadata_Version>9.8.4</Metadata_Version>
+ <Last_DIF_Revision_Date>2015-02-05</Last_DIF_Revision_Date>
+ </DIF>
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif?rev=1662970&view=auto
==============================================================================
---
tika/trunk/tika-parsers/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
(added)
+++
tika/trunk/tika-parsers/src/test/resources/test-documents/carbon_isotopic_values_of_alkanes_extracted_from_paleosols.dif
Sat Feb 28 17:30:35 2015
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="UTF-8"?>
+ <DIF xmlns="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/
http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.8.4.xsd">
+ <Entry_ID>005f3222-7548-11e2-851e-00c0f03d5b7c</Entry_ID>
+ <Entry_Title>Carbon Isotopic Values of Alkanes Extracted from
Paleosols</Entry_Title>
+
+ <Parameters>
+ <Category>EARTH SCIENCE</Category>
+ <Topic>PALEOCLIMATE</Topic>
+ <Term>LAND RECORDS</Term>
+ <Variable_Level_1>PALEOSOLS</Variable_Level_1>
+ </Parameters>
+ <Parameters>
+ <Category>EARTH SCIENCE</Category>
+ <Topic>LAND SURFACE</Topic>
+ <Term>SOILS</Term>
+ <Variable_Level_1>CARBON</Variable_Level_1>
+ </Parameters>
+ <Parameters>
+ <Category>EARTH SCIENCE</Category>
+ <Topic>PALEOCLIMATE</Topic>
+ <Term>LAND RECORDS</Term>
+ <Variable_Level_1>ISOTOPES</Variable_Level_1>
+ </Parameters>
+ <Parameters>
+ <Category>EARTH SCIENCE</Category>
+ <Topic>BIOSPHERE</Topic>
+ <Term>ECOLOGICAL DYNAMICS</Term>
+ <Variable_Level_1>ECOSYSTEM FUNCTIONS</Variable_Level_1>
+ <Variable_Level_2>BIOGEOCHEMICAL CYCLES</Variable_Level_2>
+ </Parameters>
+ <Parameters>
+ <Category>EARTH SCIENCE</Category>
+ <Topic>SOLID EARTH</Topic>
+ <Term>GEOCHEMISTRY</Term>
+ <Variable_Level_1>BIOGEOCHEMICAL PROCESSES</Variable_Level_1>
+ </Parameters>
+
+
+ <Spatial_Coverage>
+ <Southernmost_Latitude>66.56</Southernmost_Latitude>
+ <Northernmost_Latitude>90</Northernmost_Latitude>
+ <Westernmost_Longitude>-180</Westernmost_Longitude>
+ <Easternmost_Longitude>180</Easternmost_Longitude>
+ </Spatial_Coverage>
+
+ <Data_Center>
+ <Data_Center_Name>
+ <Short_Name>ACADIS</Short_Name>
+ <Long_Name>Advanced Cooperative Arctic Data and Information
Service</Long_Name>
+ </Data_Center_Name>
+ <Data_Center_URL>http://www.aoncadis.org/</Data_Center_URL>
+ <Personnel>
+ <Role>DATA CENTER CONTACT</Role>
+ <First_Name>ACADIS</First_Name>
+ <Last_Name>User Services</Last_Name>
+ <Contact_Address>
+ <Address>NCAR/CISL</Address>
+ <Address>P.O. Box 3000</Address>
+ <City>Boulder</City>
+ <Province_or_State>CO</Province_or_State>
+ <Postal_Code>80307</Postal_Code>
+ <Country>USA</Country>
+ </Contact_Address>
+ </Personnel>
+ </Data_Center>
+
+ <Summary>
+ <Abstract>Dataset consists of compound specific carbon isotopic
values of alkanes
+extracted from paleosols. Values represent the mean of duplicate
+measurements.</Abstract>
+ </Summary>
+
+ <Related_URL>
+ <URL_Content_Type>
+ <Type>GET DATA</Type>
+ </URL_Content_Type>
+
<URL>http://www.aoncadis.org/dataset/id/005f3222-7548-11e2-851e-00c0f03d5b7c.html</URL>
+ <Description>Data Center top-level access page for this
resource</Description>
+ </Related_URL>
+
+ <Metadata_Name>ACADIS IDN DIF</Metadata_Name>
+ <Metadata_Version>9.8.4</Metadata_Version>
+ <Last_DIF_Revision_Date>2015-02-05</Last_DIF_Revision_Date>
+ </DIF>