TIKA-2064 Test Stata DTA files from Michael Stepner, plus detection unit test


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/e58ade38
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/e58ade38
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/e58ade38

Branch: refs/heads/2.x
Commit: e58ade381a3e4285eb81d55fb250611e82adbef7
Parents: 443a21e
Author: Nick Burch <n...@gagravarr.org>
Authored: Tue Sep 13 20:41:41 2016 +0100
Committer: Nick Burch <n...@gagravarr.org>
Committed: Tue Sep 13 20:48:11 2016 +0100

----------------------------------------------------------------------
 .../java/org/apache/tika/mime/TestMimeTypes.java    |  10 ++++++++++
 .../test/resources/test-documents/testStataDTA.dta  | Bin 0 -> 1207 bytes
 .../test/resources/test-documents/testStataDTA.txt  |  15 +++++++++++++++
 3 files changed, 25 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/e58ade38/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
b/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index d4840b7..756d744 100644
--- a/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -1004,6 +1004,16 @@ public class TestMimeTypes extends TikaTest {
         assertTypeByData("application/x-endnote-refer", 
"testEndNoteImportFile.enw");
     }
 
+    @Test
+    public void testStataDTA() throws Exception {
+        // Filename only gives base type
+        assertTypeByName("application/x-stata-dta", "testStataDTA.dta");
+        // With data too, can get specific version
+        assertTypeByData("application/x-stata-dta; version=13", 
"testStataDTA.dta");
+        // Name + data gets specific version as well
+        assertType("application/x-stata-dta; version=13", "testStataDTA.dta");
+    }
+
     private void assertText(byte[] prefix) throws IOException {
         assertMagic("text/plain", prefix);
     }

http://git-wip-us.apache.org/repos/asf/tika/blob/e58ade38/tika-parsers/src/test/resources/test-documents/testStataDTA.dta
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/resources/test-documents/testStataDTA.dta 
b/tika-parsers/src/test/resources/test-documents/testStataDTA.dta
new file mode 100644
index 0000000..92dd695
Binary files /dev/null and 
b/tika-parsers/src/test/resources/test-documents/testStataDTA.dta differ

http://git-wip-us.apache.org/repos/asf/tika/blob/e58ade38/tika-parsers/src/test/resources/test-documents/testStataDTA.txt
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/resources/test-documents/testStataDTA.txt 
b/tika-parsers/src/test/resources/test-documents/testStataDTA.txt
new file mode 100644
index 0000000..7270623
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testStataDTA.txt
@@ -0,0 +1,15 @@
+testStataDTA.dta was created on Stata 13.1 running on Mac OS X, from:
+---------------------------------------------------------------------
+clear all
+set obs 3
+
+gen byte integers=_n
+gen double reals = sqrt(_n)
+
+gen fruits = ""
+replace fruits = "apple" in 1
+replace fruits = "banana" in 2
+replace fruits = "cantaloupe" in 3
+
+save stata_test_data.dta
+---------------------------------------------------------------------

Reply via email to