svn commit: r1203681 - in /tika/trunk/tika-parsers/src/test/resources/test-documents: testDITA.dita testDITA.ditamap testDITA2.dita

2011-11-18 Thread nick
Author: nick
Date: Fri Nov 18 15:01:07 2011
New Revision: 1203681

URL: http://svn.apache.org/viewvc?rev=1203681view=rev
Log:
TIKA-784 Sample DITA task, concept and map files. (Based on some Alfresco 
documentation, with content replaced with Tika info)

Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.dita
tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.ditamap
tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA2.dita

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.dita
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.dita?rev=1203681view=auto
==
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.dita 
(added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.dita Fri 
Nov 18 15:01:07 2011
@@ -0,0 +1,34 @@
+?xml version=1.0 encoding=UTF-8?
+!DOCTYPE task PUBLIC -//OASIS//DTD DITA Task//EN task.dtd
+task id=apache-tika
+titleApache Tika/title
+shortdescApache Tika - a content analysis toolkit./shortdesc
+prolog
+authorApache Software Foundation/author
+copyright
+copyryear year=2011/
+copyrholderApache Software Foundation/copyrholder
+/copyright
+metadata
+audience experiencelevel=expert job=Customizing type=Coder/
+categoryMetadata/category
+keywords
+keywordTika/keyword
+keywordContent/keyword
+/keywords
+prodinfo
+prodnameApache Tika/prodname
+vrmlist
+vrm version=1.x release=Final 
modification=2011/11/11/
+/vrmlist
+/prodinfo
+/metadata
+/prolog
+taskbody
+context
+pThe Apache Tika toolkit detects and extracts metadata and 
structured text content from various documents using existing parser libraries. 
You can find the latest release on the download page. See the Getting Started 
guide for instructions on how to start using Tika./p
+
+pTika is a project of the Apache Software Foundation, and was 
formerly a subproject of Apache Lucene./p
+/context
+/taskbody
+/task

Added: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.ditamap
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.ditamap?rev=1203681view=auto
==
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.ditamap 
(added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA.ditamap 
Fri Nov 18 15:01:07 2011
@@ -0,0 +1,23 @@
+?xml version='1.0' encoding='UTF-8'?
+!DOCTYPE map PUBLIC -//OASIS//DTD DITA Map//EN 
http://docs.oasis-open.org/dita/v1.1/OS/dtd/map.dtd;
+map id=apache-tika title=Apache Tika
+topicmeta
+authorApache Tika/author
+copyright
+copyryear year=2011/
+copyrholderApache Software Foundation/copyrholder
+/copyright
+categoryVersion 1.x/category
+categoryTika/category
+categoryMime/category
+prodinfo
+prodnameApache Tika/prodname
+vrmlist
+vrm version=1.x release=Final modification=2011/11/11/
+/vrmlist
+/prodinfo
+/topicmeta
+topicref href=testDITA.dita
+topicref href=testDITA2.dita /
+/topicref
+/map

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA2.dita
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA2.dita?rev=1203681view=auto
==
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA2.dita 
(added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testDITA2.dita 
Fri Nov 18 15:01:07 2011
@@ -0,0 +1,33 @@
+?xml version=1.0 encoding=UTF-8?
+!DOCTYPE concept PUBLIC -//OASIS//DTD DITA Concept//EN 
http://docs.oasis-open.org/dita/v1.1/OS/dtd/concept.dtd;
+concept id=tika-arch
+ titleApache Tika Architecture/title
+ shortdescThis section describes the Apache Tika architecture./shortdesc
+ prolog
+  authorApache Software Foundation/author
+  copyright
+   copyryear year=2011/
+   copyrholderApache Software Foundation/copyrholder
+  /copyright
+  metadata
+  audience experiencelevel=expert job=Customizing type=Coder/
+  categoryMetadata/category
+  keywords
+  keywordTika/keyword
+  keywordContent/keyword
+  /keywords
+  prodinfo
+  prodnameApache Tika/prodname
+  vrmlist
+  vrm version=1.x release=Final modification=2011/11/11/
+  /vrmlist
+  /prodinfo
+  /metadata
+ /prolog
+ 

svn commit: r1203689 - in /tika/trunk: tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java

2011-11-18 Thread nick
Author: nick
Date: Fri Nov 18 15:13:52 2011
New Revision: 1203689

URL: http://svn.apache.org/viewvc?rev=1203689view=rev
Log:
TIKA-784 DITA mimetype entries for the 3 subtypes, plus tests

Modified:

tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml

tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java

Modified: 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1203689r1=1203688r2=1203689view=diff
==
--- 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
(original)
+++ 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
Fri Nov 18 15:13:52 2011
@@ -72,8 +72,32 @@
 
   mime-type type=application/dita+xml
 sub-class-of type=application/xml/
-glob pattern=*.dita/
+_commentDarwin Information Typing Architecture/_comment
+  /mime-type
+
+  mime-type type=application/dita+map+xml
+sub-class-of type=application/dita+xml/
+_commentDITA Map/_comment
+root-XML localName=map/
+root-XML localName=map 
namespaceURI=http://docs.oasis-open.org/namespace/
 glob pattern=*.ditamap/
+  /mime-type
+  mime-type type=application/dita+topic+xml
+sub-class-of type=application/dita+xml/
+_commentDITA Topic/_comment
+root-XML localName=topic/
+root-XML localName=topic 
namespaceURI=http://docs.oasis-open.org/namespace/
+root-XML localName=task/
+root-XML localName=task 
namespaceURI=http://docs.oasis-open.org/namespace/
+root-XML localName=concept/
+root-XML localName=concept 
namespaceURI=http://docs.oasis-open.org/namespace/
+glob pattern=*.dita/
+  /mime-type
+  mime-type type=application/dita+val+xml
+sub-class-of type=application/dita+xml/
+_commentDITA Conditional Processing Profile/_comment
+root-XML localName=val/
+root-XML localName=val 
namespaceURI=http://docs.oasis-open.org/namespace/
 glob pattern=*.ditaval/
   /mime-type
 

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1203689r1=1203688r2=1203689view=diff
==
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
(original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
Fri Nov 18 15:13:52 2011
@@ -333,7 +333,7 @@ public class TestMimeTypes extends TestC
 assertTypeByName(application/postscript, x.epsi);
 }
 
-public void testMicrosoftMultiMedia() throws Exception {
+public void testMicrosoftMultiMediaDetection() throws Exception {
assertTypeByName(video/x-ms-asf, x.asf);
assertTypeByName(video/x-ms-wmv, x.wmv);
assertTypeByName(audio/x-ms-wma, x.wma);
@@ -342,6 +342,32 @@ public class TestMimeTypes extends TestC
assertTypeByData(video/x-ms-wmv, testWMV.wmv);
assertTypeByData(audio/x-ms-wma, testWMA.wma);
 }
+
+/**
+ * All 3 DITA types are in theory handled by the same mimetype,
+ *  but we specialise them 
+ */
+public void testDITADetection() throws Exception {
+   assertTypeByName(application/dita+topic+xml, test.dita);
+   assertTypeByName(application/dita+map+xml, test.ditamap);
+   assertTypeByName(application/dita+val+xml, test.ditaval);
+   
+   assertTypeByData(application/dita+topic+xml, testDITA.dita);
+   assertTypeByData(application/dita+topic+xml, testDITA2.dita);
+   assertTypeByData(application/dita+map+xml, testDITA.ditamap);
+   
+   assertTypeByNameAndData(application/dita+topic+xml, testDITA.dita);
+   assertTypeByNameAndData(application/dita+topic+xml, testDITA2.dita);
+   assertTypeByNameAndData(application/dita+map+xml, testDITA.ditamap);
+   
+   // These are all children of the official type
+   assertEquals(application/dita+xml, 
+ 
repo.getMediaTypeRegistry().getSupertype(getTypeByNameAndData(testDITA.dita)).toString());
+   assertEquals(application/dita+xml, 
+ 
repo.getMediaTypeRegistry().getSupertype(getTypeByNameAndData(testDITA2.dita)).toString());
+   assertEquals(application/dita+xml, 
+ 
repo.getMediaTypeRegistry().getSupertype(getTypeByNameAndData(testDITA.ditamap)).toString());
+}
 
 /**
  * @since TIKA-194
@@ -499,15 +525,18 @@ public class TestMimeTypes extends TestC
 
 private void assertTypeByNameAndData(String expected, String filename)
throws IOException {
-   InputStream stream = TestMimeTypes.class.getResourceAsStream(
-   /test-documents/ + filename);
-