Merge branch 'TIKA-1872' of https://github.com/trevorlewis/tika into TIKA-1872
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/9cdfc4ae Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/9cdfc4ae Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/9cdfc4ae Branch: refs/heads/master Commit: 9cdfc4ae23fe9be3b62c827d3771f7bd713fcc63 Parents: c94236a eafe280 Author: Chris Mattmann <[email protected]> Authored: Wed Mar 30 13:40:26 2016 -0700 Committer: Chris Mattmann <[email protected]> Committed: Wed Mar 30 13:40:26 2016 -0700 ---------------------------------------------------------------------- pom.xml | 1 + tika-app/pom.xml | 5 + .../main/java/org/apache/tika/cli/TikaCLI.java | 41 +- .../java/org/apache/tika/cli/TikaCLITest.java | 16 - .../tika/language/LanguageIdentifier.java | 240 -- .../apache/tika/language/LanguageProfile.java | 314 --- .../tika/language/LanguageProfilerBuilder.java | 770 ------ .../apache/tika/language/ProfilingHandler.java | 67 - .../apache/tika/language/ProfilingWriter.java | 103 - .../language/detect/LanguageConfidence.java | 25 + .../tika/language/detect/LanguageDetector.java | 239 ++ .../tika/language/detect/LanguageHandler.java | 66 + .../tika/language/detect/LanguageNames.java | 86 + .../tika/language/detect/LanguageResult.java | 98 + .../tika/language/detect/LanguageWriter.java | 78 + .../org/apache/tika/language/package-info.java | 22 - .../resources/org/apache/tika/language/be.ngp | 1014 ------- .../resources/org/apache/tika/language/ca.ngp | 1014 ------- .../resources/org/apache/tika/language/da.ngp | 1014 ------- .../resources/org/apache/tika/language/de.ngp | 1014 ------- .../resources/org/apache/tika/language/el.ngp | 1014 ------- .../resources/org/apache/tika/language/en.ngp | 1014 ------- .../resources/org/apache/tika/language/eo.ngp | 1014 ------- .../resources/org/apache/tika/language/es.ngp | 1014 ------- .../resources/org/apache/tika/language/et.ngp | 1014 ------- .../resources/org/apache/tika/language/fa.ngp | 1001 ------- .../resources/org/apache/tika/language/fi.ngp | 1014 ------- .../resources/org/apache/tika/language/fr.ngp | 1014 ------- .../resources/org/apache/tika/language/gl.ngp | 1014 ------- .../resources/org/apache/tika/language/hu.ngp | 1014 ------- .../resources/org/apache/tika/language/is.ngp | 1014 ------- .../resources/org/apache/tika/language/it.ngp | 1014 ------- .../resources/org/apache/tika/language/lt.ngp | 1209 -------- .../resources/org/apache/tika/language/nl.ngp | 1014 ------- .../resources/org/apache/tika/language/no.ngp | 1014 ------- .../resources/org/apache/tika/language/pl.ngp | 1014 ------- .../resources/org/apache/tika/language/pt.ngp | 1014 ------- .../resources/org/apache/tika/language/ro.ngp | 1014 ------- .../resources/org/apache/tika/language/ru.ngp | 1014 ------- .../resources/org/apache/tika/language/sk.ngp | 1014 ------- .../resources/org/apache/tika/language/sl.ngp | 1014 ------- .../resources/org/apache/tika/language/sv.ngp | 1014 ------- .../resources/org/apache/tika/language/th.ngp | 1014 ------- .../tika/language/tika.language.properties | 56 - .../resources/org/apache/tika/language/uk.ngp | 1014 ------- .../tika/language/LanguageIdentifierTest.java | 183 -- .../tika/language/LanguageProfileTest.java | 58 - .../language/LanguageProfilerBuilderTest.java | 100 - .../tika/language/ProfilingWriterTest.java | 44 - .../tika/language/detect/LanguageNamesTest.java | 38 + .../resources/org/apache/tika/language/da.test | 108 - .../resources/org/apache/tika/language/de.test | 104 - .../resources/org/apache/tika/language/el.test | 109 - .../resources/org/apache/tika/language/en.test | 105 - .../resources/org/apache/tika/language/es.test | 107 - .../resources/org/apache/tika/language/et.test | 17 - .../resources/org/apache/tika/language/fi.test | 106 - .../resources/org/apache/tika/language/fr.test | 105 - .../resources/org/apache/tika/language/it.test | 109 - .../tika/language/langbuilder/welsh_corpus.txt | 2602 ------------------ .../resources/org/apache/tika/language/lt.test | 32 - .../resources/org/apache/tika/language/nl.test | 105 - .../resources/org/apache/tika/language/pt.test | 105 - .../resources/org/apache/tika/language/sv.test | 108 - tika-example/pom.xml | 12 + .../java/org/apache/tika/example/Language.java | 32 +- .../tika/example/LanguageDetectingParser.java | 16 +- .../tika/example/LanguageDetectorExample.java | 33 + .../tika/example/LanguageIdentifierExample.java | 27 - .../org/apache/tika/example/MyFirstTika.java | 13 +- .../example/LanguageDetectorExampleTest.java | 39 + .../example/LanguageIdentifierExampleTest.java | 37 - tika-langdetect/pom.xml | 171 ++ .../tika/langdetect/OptimaizeLangDetector.java | 196 ++ .../tika/langdetect/TextLangDetector.java | 181 ++ ...apache.tika.language.detect.LanguageDetector | 1 + .../tika/langdetect/LanguageDetectorTest.java | 92 + .../langdetect/OptimaizeLangDetectorTest.java | 265 ++ .../tika/langdetect/TextLangDetectorTest.java | 63 + .../src/test/resources/log4j.properties | 24 + .../apache/tika/langdetect/language-codes.txt | 186 ++ .../tika/langdetect/language-tests/da.test | 108 + .../tika/langdetect/language-tests/de.test | 104 + .../tika/langdetect/language-tests/el.test | 109 + .../tika/langdetect/language-tests/en.test | 105 + .../tika/langdetect/language-tests/es.test | 107 + .../tika/langdetect/language-tests/et.test | 17 + .../tika/langdetect/language-tests/fi.test | 106 + .../tika/langdetect/language-tests/fr.test | 105 + .../tika/langdetect/language-tests/it.test | 109 + .../tika/langdetect/language-tests/ja.test | 78 + .../tika/langdetect/language-tests/lt.test | 32 + .../tika/langdetect/language-tests/nl.test | 105 + .../tika/langdetect/language-tests/pt.test | 105 + .../tika/langdetect/language-tests/sv.test | 108 + .../tika/langdetect/language-tests/th.test | 28 + .../tika/langdetect/language-tests/zh.test | 57 + .../org/apache/tika/langdetect/text-test.tsv | 18 + .../org/apache/tika/langdetect/udhr-known.txt | 11 + .../org/apache/tika/langdetect/udhr-unknown.txt | 4 + tika-parent/pom.xml | 5 +- tika-server/pom.xml | 5 + .../tika/server/resource/LanguageResource.java | 27 +- .../tika/server/resource/MetadataResource.java | 9 +- .../resource/RecursiveMetadataResource.java | 7 +- .../tika/server/resource/TranslateResource.java | 22 +- tika-translate/pom.xml | 5 + .../language/translate/AbstractTranslator.java | 16 + .../language/translate/CachedTranslator.java | 20 +- .../language/translate/ExternalTranslator.java | 13 +- .../language/translate/GoogleTranslator.java | 20 +- .../language/translate/Lingo24Translator.java | 20 +- .../language/translate/MosesTranslator.java | 7 +- 113 files changed, 3452 insertions(+), 34574 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/9cdfc4ae/tika-parent/pom.xml ----------------------------------------------------------------------
