Author: pottlinger
Date: Thu May 21 22:08:07 2015
New Revision: 1680958
URL: http://svn.apache.org/r1680958
Log:
RAT-201: In case of errors with the given charset fallback to UTF-8.
* In case an invalid encoding is defined via system property we fallback to
UTF-8.
Modified:
creadur/rat/trunk/RELEASE_NOTES.txt
creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java
Modified: creadur/rat/trunk/RELEASE_NOTES.txt
URL:
http://svn.apache.org/viewvc/creadur/rat/trunk/RELEASE_NOTES.txt?rev=1680958&r1=1680957&r2=1680958&view=diff
==============================================================================
--- creadur/rat/trunk/RELEASE_NOTES.txt (original)
+++ creadur/rat/trunk/RELEASE_NOTES.txt Thu May 21 22:08:07 2015
@@ -49,7 +49,8 @@ Rat 0.12 (SNAPSHOT)
* [RAT-172] - Exclude technical directories of source code management
systems and their ignore files from RAT scans.
Enabled for SVN,Git,Mercurial,Bazar and CVS.
* [RAT-200] - Update to latest ASF parent pom v17.
- * [RAT-201] - BinaryGuesser should treat *.swf as binary
+ * [RAT-201] - BinaryGuesser should treat *.swf as binary.
+ Furthermore BinaryGuesser falls back to UTF-8 encoding in
case the encoding given via system property (-Dfile.encoding) was not found.
Rat 0.11
========
Modified:
creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
URL:
http://svn.apache.org/viewvc/creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java?rev=1680958&r1=1680957&r2=1680958&view=diff
==============================================================================
---
creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
(original)
+++
creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
Thu May 21 22:08:07 2015
@@ -29,6 +29,7 @@ import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
+import java.nio.charset.UnsupportedCharsetException;
import java.util.Locale;
/**
@@ -38,6 +39,9 @@ public class BinaryGuesser {
private static final String DOT = ".";
+ static final String FILE_ENCODING = "file.encoding";
+ private static Charset CHARSET_FROM_FILE_ENCODING_OR_UTF8 =
getFileEncodingOrUTF8AsFallback();
+
private static boolean isBinaryDocument(Document document) {
boolean result = false;
InputStream stream = null;
@@ -75,10 +79,8 @@ public class BinaryGuesser {
/**
* @param in the file to check.
* @return Do the first few bytes of the stream hint at a binary file?
- * <p/>
* <p>Any IOException is swallowed internally and the test returns
* false.</p>
- * <p/>
* <p>This method may lead to false negatives if the reader throws
* an exception because it can't read characters according to the
* reader's encoding from the underlying stream.</p>
@@ -99,10 +101,8 @@ public class BinaryGuesser {
/**
* @param in the file to check.
* @return Do the first few bytes of the stream hint at a binary file?
- * <p/>
* <p>Any IOException is swallowed internally and the test returns
* false.</p>
- * <p/>
* <p>This method will try to read bytes from the stream and
* translate them to characters according to the platform's
* default encoding. If any bytes can not be translated to
@@ -116,8 +116,7 @@ public class BinaryGuesser {
if (bytesRead > 0) {
ByteBuffer bytes = ByteBuffer.wrap(taste, 0, bytesRead);
CharBuffer chars = CharBuffer.allocate(2 * bytesRead);
- Charset cs =
Charset.forName(System.getProperty("file.encoding"));
- CharsetDecoder cd = cs.newDecoder()
+ CharsetDecoder cd =
CHARSET_FROM_FILE_ENCODING_OR_UTF8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
while (bytes.remaining() > 0) {
@@ -143,6 +142,13 @@ public class BinaryGuesser {
return false;
}
+ static Charset getFileEncodingOrUTF8AsFallback() {
+ try {
+ return Charset.forName(System.getProperty(FILE_ENCODING));
+ } catch (UnsupportedCharsetException e) {
+ return Charset.forName("UTF-8");
+ }
+ }
/**
* @param name current file name.
Modified:
creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java
URL:
http://svn.apache.org/viewvc/creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java?rev=1680958&r1=1680957&r2=1680958&view=diff
==============================================================================
---
creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java
(original)
+++
creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java
Thu May 21 22:08:07 2015
@@ -28,6 +28,7 @@ import java.io.Reader;
import java.util.Arrays;
import java.util.List;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -131,4 +132,14 @@ public class BinaryGuesserTest {
public void emptyFile() throws IOException {
assertFalse(BinaryGuesser.isBinary(new
FileDocument(Resources.getResourceFile("/elements/sub/Empty.txt"))));
}
+
+ @Test
+ public void testFileEncodingSettable() {
+ System.setProperty(BinaryGuesser.FILE_ENCODING,
"shouldThrowAnExceptionBecauseNotFound");
+ assertEquals("UTF-8",
BinaryGuesser.getFileEncodingOrUTF8AsFallback().displayName());
+
+ final String usAscii = "US-ASCII";
+ System.setProperty(BinaryGuesser.FILE_ENCODING, usAscii);
+ assertEquals(usAscii,
BinaryGuesser.getFileEncodingOrUTF8AsFallback().displayName());
+ }
}