Claudenw commented on code in PR #395: URL: https://github.com/apache/creadur-rat/pull/395#discussion_r1845347757
########## apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java: ########## @@ -133,6 +148,31 @@ public static String process(final Document document) throws RatDocumentAnalysis } } + /** + * Determine the character set for the input stream. Input stream must implement mark. + * @param stream the stream to check. + * @return the detected characterset or null if not detectable. + * @throws IOException on IO error. + */ + private static Charset detectCharset(final InputStream stream) throws IOException { + CharsetDetector encodingDetector = new CharsetDetector(); + encodingDetector.setText(stream); + CharsetMatch charsetMatch = encodingDetector.detect(); + if (charsetMatch != null) { + try { + return Charset.forName(charsetMatch.getName()); + } catch (UnsupportedCharsetException e) { + // do nothing Review Comment: Good catch. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@creadur.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org