[ 
https://issues.apache.org/jira/browse/TIKA-4671?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18059518#comment-18059518
 ] 

Hudson commented on TIKA-4671:
------------------------------

SUCCESS: Integrated in Jenkins build Tika ยป tika-main-jdk17 #1216 (See 
[https://ci-builds.apache.org/job/Tika/job/tika-main-jdk17/1216/])
TIKA-4671 - language aware charset detection (tallison: 
[https://github.com/apache/tika/commit/52f7d4c82e0ce0a3747a5db181fd4e55641f968b])
* (edit) 
tika-core/src/main/java/org/apache/tika/language/detect/LanguageResult.java
* (edit) docs/modules/ROOT/pages/advanced/index.adoc
* (add) 
tika-core/src/main/java/org/apache/tika/detect/EncodingDetectorContext.java
* (edit) 
tika-core/src/main/java/org/apache/tika/detect/CompositeEncodingDetector.java
* (edit) 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/TikaEncodingDetectorTest.java
* (edit) docs/modules/ROOT/nav.adoc
* (edit) 
tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
* (edit) 
tika-core/src/main/java/org/apache/tika/detect/DefaultEncodingDetector.java
* (edit) 
tika-langdetect/tika-langdetect-charsoup/src/main/java/org/apache/tika/langdetect/charsoup/CharSoupLanguageDetector.java
* (edit) 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/pom.xml
* (add) docs/modules/ROOT/pages/advanced/tika-eval.adoc
TIKA-4671 - git add (tallison: 
[https://github.com/apache/tika/commit/1bf02554296fa38f236870dc9feef7f687e23abb])
* (add) 
tika-langdetect/tika-langdetect-charsoup/src/test/java/org/apache/tika/langdetect/charsoup/CharSoupEncodingDetectorTest.java
* (add) 
tika-langdetect/tika-langdetect-charsoup/src/main/java/org/apache/tika/langdetect/charsoup/CharSoupEncodingDetector.java
* (add) 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testArabicMisleadingCharset.html
* (add) tika-core/src/main/java/org/apache/tika/detect/MetaEncodingDetector.java
Revert "TIKA-4671 - git add" (tallison: 
[https://github.com/apache/tika/commit/1a236d2a4e071f4525eaa50e10fe1c6fc5a66e63])
* (delete) 
tika-langdetect/tika-langdetect-charsoup/src/test/java/org/apache/tika/langdetect/charsoup/CharSoupEncodingDetectorTest.java
* (delete) 
tika-langdetect/tika-langdetect-charsoup/src/main/java/org/apache/tika/langdetect/charsoup/CharSoupEncodingDetector.java
* (delete) 
tika-core/src/main/java/org/apache/tika/detect/MetaEncodingDetector.java
* (delete) 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testArabicMisleadingCharset.html
Revert "TIKA-4671 - language aware charset detection" (tallison: 
[https://github.com/apache/tika/commit/c4d67657a0b6eaaaf3ddb33448ebc0321e2df87d])
* (delete) docs/modules/ROOT/pages/advanced/tika-eval.adoc
* (edit) docs/modules/ROOT/nav.adoc
* (edit) 
tika-core/src/main/java/org/apache/tika/detect/CompositeEncodingDetector.java
* (delete) 
tika-core/src/main/java/org/apache/tika/detect/EncodingDetectorContext.java
* (edit) 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/TikaEncodingDetectorTest.java
* (edit) 
tika-core/src/main/java/org/apache/tika/language/detect/LanguageResult.java
* (edit) 
tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
* (edit) 
tika-core/src/main/java/org/apache/tika/detect/DefaultEncodingDetector.java
* (edit) 
tika-langdetect/tika-langdetect-charsoup/src/main/java/org/apache/tika/langdetect/charsoup/CharSoupLanguageDetector.java
* (edit) 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/pom.xml
* (edit) docs/modules/ROOT/pages/advanced/index.adoc


> Use langid to adjudicate charset detector disagreements
> -------------------------------------------------------
>
>                 Key: TIKA-4671
>                 URL: https://issues.apache.org/jira/browse/TIKA-4671
>             Project: Tika
>          Issue Type: New Feature
>            Reporter: Tim Allison
>            Priority: Major
>




--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to