TIKA-2064 Test Stata DTA files from Michael Stepner, plus detection unit test
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/e58ade38 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/e58ade38 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/e58ade38 Branch: refs/heads/2.x Commit: e58ade381a3e4285eb81d55fb250611e82adbef7 Parents: 443a21e Author: Nick Burch <n...@gagravarr.org> Authored: Tue Sep 13 20:41:41 2016 +0100 Committer: Nick Burch <n...@gagravarr.org> Committed: Tue Sep 13 20:48:11 2016 +0100 ---------------------------------------------------------------------- .../java/org/apache/tika/mime/TestMimeTypes.java | 10 ++++++++++ .../test/resources/test-documents/testStataDTA.dta | Bin 0 -> 1207 bytes .../test/resources/test-documents/testStataDTA.txt | 15 +++++++++++++++ 3 files changed, 25 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/e58ade38/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java ---------------------------------------------------------------------- diff --git a/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java index d4840b7..756d744 100644 --- a/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java +++ b/tika-app/src/test/java/org/apache/tika/mime/TestMimeTypes.java @@ -1004,6 +1004,16 @@ public class TestMimeTypes extends TikaTest { assertTypeByData("application/x-endnote-refer", "testEndNoteImportFile.enw"); } + @Test + public void testStataDTA() throws Exception { + // Filename only gives base type + assertTypeByName("application/x-stata-dta", "testStataDTA.dta"); + // With data too, can get specific version + assertTypeByData("application/x-stata-dta; version=13", "testStataDTA.dta"); + // Name + data gets specific version as well + assertType("application/x-stata-dta; version=13", "testStataDTA.dta"); + } + private void assertText(byte[] prefix) throws IOException { assertMagic("text/plain", prefix); } http://git-wip-us.apache.org/repos/asf/tika/blob/e58ade38/tika-parsers/src/test/resources/test-documents/testStataDTA.dta ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/resources/test-documents/testStataDTA.dta b/tika-parsers/src/test/resources/test-documents/testStataDTA.dta new file mode 100644 index 0000000..92dd695 Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/testStataDTA.dta differ http://git-wip-us.apache.org/repos/asf/tika/blob/e58ade38/tika-parsers/src/test/resources/test-documents/testStataDTA.txt ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/resources/test-documents/testStataDTA.txt b/tika-parsers/src/test/resources/test-documents/testStataDTA.txt new file mode 100644 index 0000000..7270623 --- /dev/null +++ b/tika-parsers/src/test/resources/test-documents/testStataDTA.txt @@ -0,0 +1,15 @@ +testStataDTA.dta was created on Stata 13.1 running on Mac OS X, from: +--------------------------------------------------------------------- +clear all +set obs 3 + +gen byte integers=_n +gen double reals = sqrt(_n) + +gen fruits = "" +replace fruits = "apple" in 1 +replace fruits = "banana" in 2 +replace fruits = "cantaloupe" in 3 + +save stata_test_data.dta +---------------------------------------------------------------------