Author: pottlinger
Date: Thu May 21 22:08:07 2015
New Revision: 1680958

URL: http://svn.apache.org/r1680958
Log:
RAT-201: In case of errors with the given charset fallback to UTF-8.

* In case an invalid encoding is defined via system property we fallback to 
UTF-8.

Modified:
    creadur/rat/trunk/RELEASE_NOTES.txt
    
creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
    
creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java

Modified: creadur/rat/trunk/RELEASE_NOTES.txt
URL: 
http://svn.apache.org/viewvc/creadur/rat/trunk/RELEASE_NOTES.txt?rev=1680958&r1=1680957&r2=1680958&view=diff
==============================================================================
--- creadur/rat/trunk/RELEASE_NOTES.txt (original)
+++ creadur/rat/trunk/RELEASE_NOTES.txt Thu May 21 22:08:07 2015
@@ -49,7 +49,8 @@ Rat 0.12 (SNAPSHOT)
     * [RAT-172] - Exclude technical directories of source code management 
systems and their ignore files from RAT scans. 
                   Enabled for SVN,Git,Mercurial,Bazar and CVS.
     * [RAT-200] - Update to latest ASF parent pom v17.
-    * [RAT-201] - BinaryGuesser should treat *.swf as binary
+    * [RAT-201] - BinaryGuesser should treat *.swf as binary. 
+                  Furthermore BinaryGuesser falls back to UTF-8 encoding in 
case the encoding given via system property (-Dfile.encoding) was not found.
 
 Rat 0.11
 ========

Modified: 
creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
URL: 
http://svn.apache.org/viewvc/creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java?rev=1680958&r1=1680957&r2=1680958&view=diff
==============================================================================
--- 
creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
 (original)
+++ 
creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
 Thu May 21 22:08:07 2015
@@ -29,6 +29,7 @@ import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
+import java.nio.charset.UnsupportedCharsetException;
 import java.util.Locale;
 
 /**
@@ -38,6 +39,9 @@ public class BinaryGuesser {
 
     private static final String DOT = ".";
 
+    static final String FILE_ENCODING = "file.encoding";
+    private static Charset CHARSET_FROM_FILE_ENCODING_OR_UTF8 = 
getFileEncodingOrUTF8AsFallback();
+
     private static boolean isBinaryDocument(Document document) {
         boolean result = false;
         InputStream stream = null;
@@ -75,10 +79,8 @@ public class BinaryGuesser {
     /**
      * @param in the file to check.
      * @return Do the first few bytes of the stream hint at a binary file?
-     * <p/>
      * <p>Any IOException is swallowed internally and the test returns
      * false.</p>
-     * <p/>
      * <p>This method may lead to false negatives if the reader throws
      * an exception because it can't read characters according to the
      * reader's encoding from the underlying stream.</p>
@@ -99,10 +101,8 @@ public class BinaryGuesser {
     /**
      * @param in the file to check.
      * @return Do the first few bytes of the stream hint at a binary file?
-     * <p/>
      * <p>Any IOException is swallowed internally and the test returns
      * false.</p>
-     * <p/>
      * <p>This method will try to read bytes from the stream and
      * translate them to characters according to the platform's
      * default encoding.  If any bytes can not be translated to
@@ -116,8 +116,7 @@ public class BinaryGuesser {
             if (bytesRead > 0) {
                 ByteBuffer bytes = ByteBuffer.wrap(taste, 0, bytesRead);
                 CharBuffer chars = CharBuffer.allocate(2 * bytesRead);
-                Charset cs = 
Charset.forName(System.getProperty("file.encoding"));
-                CharsetDecoder cd = cs.newDecoder()
+                CharsetDecoder cd = 
CHARSET_FROM_FILE_ENCODING_OR_UTF8.newDecoder()
                         .onMalformedInput(CodingErrorAction.REPORT)
                         .onUnmappableCharacter(CodingErrorAction.REPORT);
                 while (bytes.remaining() > 0) {
@@ -143,6 +142,13 @@ public class BinaryGuesser {
         return false;
     }
 
+    static Charset getFileEncodingOrUTF8AsFallback() {
+        try {
+            return Charset.forName(System.getProperty(FILE_ENCODING));
+        } catch (UnsupportedCharsetException e) {
+            return Charset.forName("UTF-8");
+        }
+    }
 
     /**
      * @param name current file name.

Modified: 
creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java
URL: 
http://svn.apache.org/viewvc/creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java?rev=1680958&r1=1680957&r2=1680958&view=diff
==============================================================================
--- 
creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java
 (original)
+++ 
creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java
 Thu May 21 22:08:07 2015
@@ -28,6 +28,7 @@ import java.io.Reader;
 import java.util.Arrays;
 import java.util.List;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
@@ -131,4 +132,14 @@ public class BinaryGuesserTest {
     public void emptyFile() throws IOException {
         assertFalse(BinaryGuesser.isBinary(new 
FileDocument(Resources.getResourceFile("/elements/sub/Empty.txt"))));
     }
+
+    @Test
+    public void testFileEncodingSettable() {
+        System.setProperty(BinaryGuesser.FILE_ENCODING, 
"shouldThrowAnExceptionBecauseNotFound");
+        assertEquals("UTF-8", 
BinaryGuesser.getFileEncodingOrUTF8AsFallback().displayName());
+
+        final String usAscii = "US-ASCII";
+        System.setProperty(BinaryGuesser.FILE_ENCODING, usAscii);
+        assertEquals(usAscii, 
BinaryGuesser.getFileEncodingOrUTF8AsFallback().displayName());
+    }
 }


Reply via email to