svn commit: r1203681 - in /tika/trunk/tika-parsers/src/test/resources/test-documents: testDITA.dita testDITA.ditamap testDITA2.dita
Author: nick Date: Fri Nov 18 15:01:07 2011 New Revision: 1203681 URL: http://svn.apache.org/viewvc?rev=1203681view=rev Log: TIKA-784 Sample DITA task, concept and map files. (Based on some Alfresco documentation, with content replaced with Tika info) Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.dita tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.ditamap tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA2.dita Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.dita URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.dita?rev=1203681view=auto == --- tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.dita (added) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.dita Fri Nov 18 15:01:07 2011 @@ -0,0 +1,34 @@ +?xml version=1.0 encoding=UTF-8? +!DOCTYPE task PUBLIC -//OASIS//DTD DITA Task//EN task.dtd +task id=apache-tika +titleApache Tika/title +shortdescApache Tika - a content analysis toolkit./shortdesc +prolog +authorApache Software Foundation/author +copyright +copyryear year=2011/ +copyrholderApache Software Foundation/copyrholder +/copyright +metadata +audience experiencelevel=expert job=Customizing type=Coder/ +categoryMetadata/category +keywords +keywordTika/keyword +keywordContent/keyword +/keywords +prodinfo +prodnameApache Tika/prodname +vrmlist +vrm version=1.x release=Final modification=2011/11/11/ +/vrmlist +/prodinfo +/metadata +/prolog +taskbody +context +pThe Apache Tika toolkit detects and extracts metadata and structured text content from various documents using existing parser libraries. You can find the latest release on the download page. See the Getting Started guide for instructions on how to start using Tika./p + +pTika is a project of the Apache Software Foundation, and was formerly a subproject of Apache Lucene./p +/context +/taskbody +/task Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.ditamap URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.ditamap?rev=1203681view=auto == --- tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.ditamap (added) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.ditamap Fri Nov 18 15:01:07 2011 @@ -0,0 +1,23 @@ +?xml version='1.0' encoding='UTF-8'? +!DOCTYPE map PUBLIC -//OASIS//DTD DITA Map//EN http://docs.oasis-open.org/dita/v1.1/OS/dtd/map.dtd; +map id=apache-tika title=Apache Tika +topicmeta +authorApache Tika/author +copyright +copyryear year=2011/ +copyrholderApache Software Foundation/copyrholder +/copyright +categoryVersion 1.x/category +categoryTika/category +categoryMime/category +prodinfo +prodnameApache Tika/prodname +vrmlist +vrm version=1.x release=Final modification=2011/11/11/ +/vrmlist +/prodinfo +/topicmeta +topicref href=testDITA.dita +topicref href=testDITA2.dita / +/topicref +/map Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA2.dita URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA2.dita?rev=1203681view=auto == --- tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA2.dita (added) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA2.dita Fri Nov 18 15:01:07 2011 @@ -0,0 +1,33 @@ +?xml version=1.0 encoding=UTF-8? +!DOCTYPE concept PUBLIC -//OASIS//DTD DITA Concept//EN http://docs.oasis-open.org/dita/v1.1/OS/dtd/concept.dtd; +concept id=tika-arch + titleApache Tika Architecture/title + shortdescThis section describes the Apache Tika architecture./shortdesc + prolog + authorApache Software Foundation/author + copyright + copyryear year=2011/ + copyrholderApache Software Foundation/copyrholder + /copyright + metadata + audience experiencelevel=expert job=Customizing type=Coder/ + categoryMetadata/category + keywords + keywordTika/keyword + keywordContent/keyword + /keywords + prodinfo + prodnameApache Tika/prodname + vrmlist + vrm version=1.x release=Final modification=2011/11/11/ + /vrmlist + /prodinfo + /metadata + /prolog +
svn commit: r1203689 - in /tika/trunk: tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Author: nick Date: Fri Nov 18 15:13:52 2011 New Revision: 1203689 URL: http://svn.apache.org/viewvc?rev=1203689view=rev Log: TIKA-784 DITA mimetype entries for the 3 subtypes, plus tests Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1203689r1=1203688r2=1203689view=diff == --- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original) +++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Fri Nov 18 15:13:52 2011 @@ -72,8 +72,32 @@ mime-type type=application/dita+xml sub-class-of type=application/xml/ -glob pattern=*.dita/ +_commentDarwin Information Typing Architecture/_comment + /mime-type + + mime-type type=application/dita+map+xml +sub-class-of type=application/dita+xml/ +_commentDITA Map/_comment +root-XML localName=map/ +root-XML localName=map namespaceURI=http://docs.oasis-open.org/namespace/ glob pattern=*.ditamap/ + /mime-type + mime-type type=application/dita+topic+xml +sub-class-of type=application/dita+xml/ +_commentDITA Topic/_comment +root-XML localName=topic/ +root-XML localName=topic namespaceURI=http://docs.oasis-open.org/namespace/ +root-XML localName=task/ +root-XML localName=task namespaceURI=http://docs.oasis-open.org/namespace/ +root-XML localName=concept/ +root-XML localName=concept namespaceURI=http://docs.oasis-open.org/namespace/ +glob pattern=*.dita/ + /mime-type + mime-type type=application/dita+val+xml +sub-class-of type=application/dita+xml/ +_commentDITA Conditional Processing Profile/_comment +root-XML localName=val/ +root-XML localName=val namespaceURI=http://docs.oasis-open.org/namespace/ glob pattern=*.ditaval/ /mime-type Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1203689r1=1203688r2=1203689view=diff == --- tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original) +++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Fri Nov 18 15:13:52 2011 @@ -333,7 +333,7 @@ public class TestMimeTypes extends TestC assertTypeByName(application/postscript, x.epsi); } -public void testMicrosoftMultiMedia() throws Exception { +public void testMicrosoftMultiMediaDetection() throws Exception { assertTypeByName(video/x-ms-asf, x.asf); assertTypeByName(video/x-ms-wmv, x.wmv); assertTypeByName(audio/x-ms-wma, x.wma); @@ -342,6 +342,32 @@ public class TestMimeTypes extends TestC assertTypeByData(video/x-ms-wmv, testWMV.wmv); assertTypeByData(audio/x-ms-wma, testWMA.wma); } + +/** + * All 3 DITA types are in theory handled by the same mimetype, + * but we specialise them + */ +public void testDITADetection() throws Exception { + assertTypeByName(application/dita+topic+xml, test.dita); + assertTypeByName(application/dita+map+xml, test.ditamap); + assertTypeByName(application/dita+val+xml, test.ditaval); + + assertTypeByData(application/dita+topic+xml, testDITA.dita); + assertTypeByData(application/dita+topic+xml, testDITA2.dita); + assertTypeByData(application/dita+map+xml, testDITA.ditamap); + + assertTypeByNameAndData(application/dita+topic+xml, testDITA.dita); + assertTypeByNameAndData(application/dita+topic+xml, testDITA2.dita); + assertTypeByNameAndData(application/dita+map+xml, testDITA.ditamap); + + // These are all children of the official type + assertEquals(application/dita+xml, + repo.getMediaTypeRegistry().getSupertype(getTypeByNameAndData(testDITA.dita)).toString()); + assertEquals(application/dita+xml, + repo.getMediaTypeRegistry().getSupertype(getTypeByNameAndData(testDITA2.dita)).toString()); + assertEquals(application/dita+xml, + repo.getMediaTypeRegistry().getSupertype(getTypeByNameAndData(testDITA.ditamap)).toString()); +} /** * @since TIKA-194 @@ -499,15 +525,18 @@ public class TestMimeTypes extends TestC private void assertTypeByNameAndData(String expected, String filename) throws IOException { - InputStream stream = TestMimeTypes.class.getResourceAsStream( - /test-documents/ + filename); -