Repository: nutch
Updated Branches:
  refs/heads/2.x 75d846cf3 -> 32d1486df


http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
 
b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
index 0db617c..d7acfdc 100644
--- 
a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
+++ 
b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
@@ -35,6 +35,7 @@ import java.io.IOException;
 import java.net.InetAddress;
 import java.net.URL;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.LinkedList;
 import java.util.List;
 
@@ -114,7 +115,7 @@ public class FtpResponse {
       if (addr != null && conf.getBoolean("store.ip.address", false) == true) {
         String ipString = addr.getHostAddress(); // get the ip address
         page.getMetadata().put(new Utf8("_ip_"),
-            ByteBuffer.wrap(ipString.getBytes()));
+            ByteBuffer.wrap(ipString.getBytes(StandardCharsets.UTF_8)));
       }
 
       // idled too long, remote server or ourselves may have timed out,
@@ -521,7 +522,7 @@ public class FtpResponse {
 
     x.append("</pre></body></html>\n");
 
-    return new String(x).getBytes();
+    return new String(x).getBytes(StandardCharsets.UTF_8);
   }
 
 }

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java
 
b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java
index cc039bd..4ce9a83 100644
--- 
a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java
+++ 
b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java
@@ -29,6 +29,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.net.URL;
+import java.util.Locale;
 
 /**
  * This class is used for parsing robots for urls belonging to FTP protocol. It
@@ -63,9 +64,9 @@ public class FtpRobotRulesParser extends RobotRulesParser {
    */
   public BaseRobotRules getRobotRulesSet(Protocol ftp, URL url) {
 
-    String protocol = url.getProtocol().toLowerCase(); // normalize to lower
+    String protocol = url.getProtocol().toLowerCase(Locale.ROOT); // normalize 
to lower
                                                        // case
-    String host = url.getHost().toLowerCase(); // normalize to lower case
+    String host = url.getHost().toLowerCase(Locale.ROOT); // normalize to 
lower case
 
     BaseRobotRules robotRules = (SimpleRobotRules) CACHE.get(protocol + ":"
         + host);

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
 
b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
index 773958a..965eaac 100644
--- 
a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
+++ 
b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
@@ -36,6 +36,7 @@ import javax.net.ssl.SSLSocket;
 import javax.net.ssl.SSLSocketFactory;
 import java.net.URL;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
@@ -138,7 +139,7 @@ public class HttpResponse implements Response {
         String ipString = sockAddr.getAddress().getHostAddress(); // get the ip
                                                                   // address
         page.getMetadata().put(new Utf8("_ip_"),
-            ByteBuffer.wrap(ipString.getBytes()));
+            ByteBuffer.wrap(ipString.getBytes(StandardCharsets.UTF_8)));
       }
 
       // make request
@@ -182,7 +183,7 @@ public class HttpResponse implements Response {
       // }
       reqStr.append("\r\n");
 
-      byte[] reqBytes = reqStr.toString().getBytes();
+      byte[] reqBytes = reqStr.toString().getBytes(StandardCharsets.UTF_8);
 
       req.write(reqBytes);
       req.flush();

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
 
b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
index 094abba..a15f91b 100644
--- 
a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
+++ 
b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
@@ -23,6 +23,7 @@ import java.util.Map;
 import java.util.TreeMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.nio.charset.StandardCharsets;
 
 // Commons Codec imports
 import org.apache.commons.codec.binary.Base64;
@@ -92,9 +93,9 @@ public class HttpBasicAuthentication implements 
HttpAuthentication,
           + " is null");
     }
 
-    byte[] credBytes = (username + ":" + password).getBytes();
+    byte[] credBytes = (username + ":" + 
password).getBytes(StandardCharsets.UTF_8);
     credentials.add("Authorization: Basic "
-        + new String(Base64.encodeBase64(credBytes)));
+        + new String(Base64.encodeBase64(credBytes), StandardCharsets.UTF_8));
     if (LOG.isTraceEnabled()) {
       LOG.trace("Basic credentials: " + credentials);
     }

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java 
b/src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java
index 1545aa2..2dbdd48 100644
--- a/src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java
+++ b/src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java
@@ -20,6 +20,7 @@ package org.apache.nutch.protocol.sftp;
 //JDK imports
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.Collection;
@@ -222,7 +223,7 @@ public class Sftp implements Protocol {
       metadata.set(Response.LOCATION, url.toExternalForm());
 
       Content content = new Content(url.toExternalForm(), url.toExternalForm(),
-          directoryList.getBytes(), "text/html", metadata, configuration);
+          directoryList.getBytes(StandardCharsets.UTF_8), "text/html", 
metadata, configuration);
       ProtocolOutput po = new ProtocolOutput(content);
       return po;
     } catch (SftpException e) {

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java
 
b/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java
index 587a218..00e6a6a 100644
--- 
a/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java
+++ 
b/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java
@@ -24,6 +24,7 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Locale;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.scoring.ScoreDatum;
@@ -32,6 +33,7 @@ import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.TableUtil;
 
 import java.text.DecimalFormat;
+import java.text.NumberFormat;
 
 import org.junit.Before;
 import org.junit.Test;
@@ -55,7 +57,11 @@ public class TestOPICScoringFilter {
   private final List<ScoreDatum> outlinkedScoreData = new 
ArrayList<ScoreDatum>();
   private static final int DEPTH = 3;
 
-  DecimalFormat df = new DecimalFormat("#.###");
+  DecimalFormat df = (DecimalFormat) 
NumberFormat.getNumberInstance(Locale.ROOT);
+
+  {
+    df.applyPattern("#.###");
+  }
 
   private final String[] seedList = new String[] { "http://a.com";,
       "http://b.com";, "http://c.com";, };

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
 
b/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
index 5162a9b..8b81023 100644
--- 
a/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
+++ 
b/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
@@ -19,6 +19,7 @@ package org.apache.nutch.collection;
 import java.io.ByteArrayInputStream;
 import java.io.InputStream;
 import java.util.Collection;
+import java.nio.charset.StandardCharsets;
 
 import org.apache.nutch.util.NutchConfiguration;
 
@@ -69,7 +70,7 @@ public class TestSubcollection {
     xml.append("</subcollection>");
     xml.append("</subcollections>");
 
-    InputStream is = new ByteArrayInputStream(xml.toString().getBytes());
+    InputStream is = new 
ByteArrayInputStream(xml.toString().getBytes(StandardCharsets.UTF_8));
 
     CollectionManager cm = new CollectionManager();
     cm.parse(is);

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
 
b/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
index 6d94391..bf1ef42 100644
--- 
a/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
+++ 
b/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
@@ -21,8 +21,12 @@ import java.io.FileReader;
 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
 import java.util.LinkedHashSet;
 import java.util.Set;
+import java.util.Locale;
+import java.nio.charset.StandardCharsets;
 
 import org.apache.commons.lang.StringUtils;
 import org.slf4j.Logger;
@@ -167,7 +171,7 @@ public class DomainURLFilter implements URLFilter {
     }
     try {
       if (reader == null) {
-        reader = new FileReader(file);
+        reader = new InputStreamReader(new FileInputStream(file), 
StandardCharsets.UTF_8);
       }
       readConfiguration(reader);
     } catch (IOException e) {
@@ -185,7 +189,7 @@ public class DomainURLFilter implements URLFilter {
 
       // match for suffix, domain, and host in that order. more general will
       // override more specific
-      String domain = URLUtil.getDomainName(url).toLowerCase().trim();
+      String domain = 
URLUtil.getDomainName(url).toLowerCase(Locale.ROOT).trim();
       String host = URLUtil.getHost(url);
       String suffix = null;
       DomainSuffix domainSuffix = URLUtil.getDomainSuffix(url);

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
 
b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
index 0bc3ca1..366c11e 100644
--- 
a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
+++ 
b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
@@ -36,6 +36,7 @@ import java.io.BufferedReader;
 import java.io.InputStreamReader;
 import java.io.IOException;
 import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
 
 import java.util.List;
 import java.util.ArrayList;
@@ -109,7 +110,7 @@ public class PrefixURLFilter implements URLFilter {
     else
       filter = new PrefixURLFilter();
 
-    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+    BufferedReader in = new BufferedReader(new InputStreamReader(System.in, 
StandardCharsets.UTF_8));
     String line;
     while ((line = in.readLine()) != null) {
       String out = filter.filter(line);

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
 
b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
index 6c14e63..1a7492a 100644
--- 
a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
+++ 
b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
@@ -33,11 +33,14 @@ import java.io.Reader;
 import java.io.FileReader;
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
 
 import java.util.List;
 import java.util.ArrayList;
+import java.util.Locale;
 
 import java.net.URL;
 import java.net.MalformedURLException;
@@ -151,7 +154,7 @@ public class SuffixURLFilter implements URLFilter {
       return null;
     String _url;
     if (ignoreCase)
-      _url = url.toLowerCase();
+      _url = url.toLowerCase(Locale.ROOT);
     else
       _url = url;
     if (filterFromPath) {
@@ -225,7 +228,7 @@ public class SuffixURLFilter implements URLFilter {
     }
     if (ignore) {
       for (int i = 0; i < aSuffixes.size(); i++) {
-        aSuffixes.set(i, ((String) aSuffixes.get(i)).toLowerCase());
+        aSuffixes.set(i, ((String) aSuffixes.get(i)).toLowerCase(Locale.ROOT));
       }
     }
     suffixes = new SuffixStringMatcher(aSuffixes);
@@ -236,14 +239,14 @@ public class SuffixURLFilter implements URLFilter {
   public static void main(String args[]) throws IOException {
 
     SuffixURLFilter filter;
-    if (args.length >= 1)
-      filter = new SuffixURLFilter(new FileReader(args[0]));
-    else {
+    if (args.length >= 1) {
+      filter = new SuffixURLFilter(new InputStreamReader(new 
FileInputStream(args[0]), StandardCharsets.UTF_8));
+    } else {
       filter = new SuffixURLFilter();
       filter.setConf(NutchConfiguration.create());
     }
 
-    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+    BufferedReader in = new BufferedReader(new InputStreamReader(System.in, 
StandardCharsets.UTF_8));
     String line;
     while ((line = in.readLine()) != null) {
       String out = filter.filter(line);

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
 
b/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
index 0fae8da..3652d47 100644
--- 
a/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
+++ 
b/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
@@ -20,16 +20,12 @@ package org.apache.nutch.net.urlnormalizer.basic;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
 import java.net.URISyntaxException;
 import java.net.URL;
 import java.net.MalformedURLException;
 import java.util.regex.Pattern;
-
-
-
-
-
-
+import java.util.Locale;
 
 // Commons Logging imports
 import org.slf4j.Logger;
@@ -83,7 +79,7 @@ public class BasicURLNormalizer extends Configured implements 
URLNormalizer {
         || "ftp".equals(protocol)) {
 
       if (host != null) {
-        String newHost = host.toLowerCase(); // lowercase host
+        String newHost = host.toLowerCase(Locale.ROOT); // lowercase host
         if (!host.equals(newHost)) {
           host = newHost;
           changed = true;
@@ -161,7 +157,7 @@ public class BasicURLNormalizer extends Configured 
implements URLNormalizer {
       System.out.println("Scope: " + scope);
     }
     String line, normUrl;
-    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+    BufferedReader in = new BufferedReader(new InputStreamReader(System.in, 
StandardCharsets.UTF_8));
     while ((line = in.readLine()) != null) {
       try {
         normUrl = normalizer.normalize(line, scope);

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java
 
b/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java
index 363da18..d460d9e 100644
--- 
a/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java
+++ 
b/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java
@@ -21,6 +21,9 @@ import java.io.FileReader;
 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
+import java.nio.charset.StandardCharsets;
 import java.net.MalformedURLException;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -205,8 +208,8 @@ public class RegexURLNormalizer extends Configured 
implements URLNormalizer {
       LOG.info("loading " + filename);
     }
     try {
-      FileReader reader = new FileReader(filename);
-      return readConfiguration(reader);
+      InputStreamReader isr = new InputStreamReader(new 
FileInputStream(filename), StandardCharsets.UTF_8);
+      return readConfiguration(isr);
     } catch (Exception e) {
       LOG.error("Error loading rules from '" + filename + "': " + e);
       return EMPTY_RULES;

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
 
b/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
index 92fd630..128ecdc 100644
--- 
a/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
+++ 
b/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
@@ -24,6 +24,7 @@ import java.io.FileInputStream;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -68,10 +69,10 @@ public class TestRegexURLNormalizer {
     });
     for (int i = 0; i < configs.length; i++) {
       try {
-        FileReader reader = new FileReader(configs[i]);
+        InputStreamReader isr = new InputStreamReader(new 
FileInputStream(configs[i]), StandardCharsets.UTF_8);;
         String cname = configs[i].getName();
         cname = cname.substring(16, cname.indexOf(".xml"));
-        normalizer.setConfiguration(reader, cname);
+        normalizer.setConfiguration(isr, cname);
         NormalizedURL[] urls = readTestFile(cname);
         testData.put(cname, urls);
       } catch (Exception e) {

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/test/org/apache/nutch/parse/TestSitemapParser.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/nutch/parse/TestSitemapParser.java 
b/src/test/org/apache/nutch/parse/TestSitemapParser.java
index 5c9ea83..7521775 100644
--- a/src/test/org/apache/nutch/parse/TestSitemapParser.java
+++ b/src/test/org/apache/nutch/parse/TestSitemapParser.java
@@ -17,31 +17,17 @@
 
 package org.apache.nutch.parse;
 
-import org.apache.avro.util.Utf8;
-import org.apache.commons.io.IOUtils;
-import org.apache.nutch.crawl.InjectType;
-import org.apache.nutch.metadata.Metadata;
-import org.apache.nutch.net.URLFilterException;
-import org.apache.nutch.net.URLNormalizers;
-import org.apache.nutch.storage.Mark;
 import org.apache.nutch.storage.ParseStatus;
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.AbstractNutchTest;
-import org.apache.nutch.util.TableUtil;
 import org.junit.After;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.mockito.Mock;
-import org.mockito.stubbing.Answer;
 
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
 
 import static org.junit.Assert.*;
 import static org.mockito.Mockito.mock;
@@ -77,7 +63,7 @@ public class TestSitemapParser extends AbstractNutchTest {
         + 
"<url>\n\t\t<loc>http://localhost/zzz4.html</loc>\n\t\t<lastmod>2015-06-10</lastmod>\n\t\t<changefreq>monthly</changefreq>\n\t\t<priority>0.8</priority>\n\t</url>\n\t"
         + 
"<url>\n\t\t<loc>http://localhost/zzz5.html</loc>\n\t\t<lastmod>2015-06-10</lastmod>\n\t\t<changefreq>monthly</changefreq>\n\t\t<priority>0.8</priority>\n\t</url>\n"
         + "</urlset>";
-    when(page.getContent()).thenReturn(ByteBuffer.wrap(content.getBytes()));
+    
when(page.getContent()).thenReturn(ByteBuffer.wrap(content.getBytes(StandardCharsets.UTF_8)));
     when(page.getContentType()).thenReturn("application/xml");
 
     NutchSitemapParser sParser = new NutchSitemapParser();
@@ -110,7 +96,7 @@ public class TestSitemapParser extends AbstractNutchTest {
         + "    </sitemap>\n"
         + "</sitemapindex>";
 
-    when(page.getContent()).thenReturn(ByteBuffer.wrap(content.getBytes()));
+    
when(page.getContent()).thenReturn(ByteBuffer.wrap(content.getBytes(StandardCharsets.UTF_8)));
     when(page.getContentType()).thenReturn("application/xml");
     when(page.getSitemaps()).thenReturn(new HashMap<CharSequence, 
CharSequence>());
 

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/test/org/apache/nutch/plugin/TestPluginSystem.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/nutch/plugin/TestPluginSystem.java 
b/src/test/org/apache/nutch/plugin/TestPluginSystem.java
index 3ff20ad..96115f5 100644
--- a/src/test/org/apache/nutch/plugin/TestPluginSystem.java
+++ b/src/test/org/apache/nutch/plugin/TestPluginSystem.java
@@ -20,8 +20,9 @@ package org.apache.nutch.plugin;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
-import java.io.FileWriter;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
 import java.util.LinkedList;
 import java.util.Locale;
 import java.util.Properties;
@@ -259,7 +260,8 @@ public class TestPluginSystem {
    */
   private void createPluginManifest(int i, String pFolderPath)
       throws IOException {
-    FileWriter out = new FileWriter(pFolderPath + File.separator + 
"plugin.xml");
+    OutputStreamWriter osw = new OutputStreamWriter(
+        new FileOutputStream(pFolderPath + File.separator + "plugin.xml"), 
StandardCharsets.UTF_8);
     String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
         + "<!--this is just a simple plugin for testing issues.-->"
         + "<plugin id=\"org.apache.nutch.plugin."
@@ -278,9 +280,9 @@ public class TestPluginSystem {
         + "id=\"aExtensionId.\" 
class=\"org.apache.nutch.plugin.HelloWorldExtension\">"
         + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>"
         + "</implementation></extension></plugin>";
-    out.write(xml);
-    out.flush();
-    out.close();
+    osw.write(xml);
+    osw.flush();
+    osw.close();
   }
 
   private String getParameterValue() {

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/test/org/apache/nutch/util/TestEncodingDetector.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/nutch/util/TestEncodingDetector.java 
b/src/test/org/apache/nutch/util/TestEncodingDetector.java
index 776573c..3f768fd 100644
--- a/src/test/org/apache/nutch/util/TestEncodingDetector.java
+++ b/src/test/org/apache/nutch/util/TestEncodingDetector.java
@@ -24,6 +24,8 @@ import org.junit.Test;
 
 import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Locale;
 
 import static org.junit.Assert.assertEquals;
 
@@ -59,7 +61,7 @@ public class TestEncodingDetector {
     detector.autoDetectClues(page, true);
     encoding = detector.guessEncoding(page, "windows-1252");
     // no information is available, so it should return default encoding
-    assertEquals("windows-1252", encoding.toLowerCase());
+    assertEquals("windows-1252", encoding.toLowerCase(Locale.ROOT));
 
     page = WebPage.newBuilder().build();
     page.setBaseUrl(new Utf8("http://www.example.com/";));
@@ -71,7 +73,7 @@ public class TestEncodingDetector {
     detector = new EncodingDetector(conf);
     detector.autoDetectClues(page, true);
     encoding = detector.guessEncoding(page, "windows-1252");
-    assertEquals("utf-16", encoding.toLowerCase());
+    assertEquals("utf-16", encoding.toLowerCase(Locale.ROOT));
 
     page = WebPage.newBuilder().build();
     page.setBaseUrl(new Utf8("http://www.example.com/";));
@@ -82,7 +84,7 @@ public class TestEncodingDetector {
     detector.autoDetectClues(page, true);
     detector.addClue("windows-1254", "sniffed");
     encoding = detector.guessEncoding(page, "windows-1252");
-    assertEquals("windows-1254", encoding.toLowerCase());
+    assertEquals("windows-1254", encoding.toLowerCase(Locale.ROOT));
 
     // enable autodetection
     conf.setInt(EncodingDetector.MIN_CONFIDENCE_KEY, 50);
@@ -91,13 +93,13 @@ public class TestEncodingDetector {
     page.setContentType(new Utf8("text/plain"));
     page.setContent(ByteBuffer.wrap(contentInOctets));
     page.getMetadata().put(new Utf8(Response.CONTENT_TYPE),
-        ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes()));
+        ByteBuffer.wrap("text/plain; 
charset=UTF-16".getBytes(StandardCharsets.UTF_8)));
 
     detector = new EncodingDetector(conf);
     detector.autoDetectClues(page, true);
     detector.addClue("utf-32", "sniffed");
     encoding = detector.guessEncoding(page, "windows-1252");
-    assertEquals("utf-8", encoding.toLowerCase());
+    assertEquals("utf-8", encoding.toLowerCase(Locale.ROOT));
   }
 
 }

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/test/org/apache/nutch/util/TestGZIPUtils.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/nutch/util/TestGZIPUtils.java 
b/src/test/org/apache/nutch/util/TestGZIPUtils.java
index 50fda7e..878442e 100644
--- a/src/test/org/apache/nutch/util/TestGZIPUtils.java
+++ b/src/test/org/apache/nutch/util/TestGZIPUtils.java
@@ -21,6 +21,7 @@ import org.junit.Test;
 import static org.junit.Assert.*;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 
 /** Unit tests for GZIPUtils methods. */
 public class TestGZIPUtils {
@@ -115,41 +116,41 @@ public class TestGZIPUtils {
 
   @Test
   public void testZipUnzip() {
-    byte[] testBytes = SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes(StandardCharsets.UTF_8);
     testZipUnzip(testBytes);
-    testBytes = LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes(StandardCharsets.UTF_8);
     testZipUnzip(testBytes);
-    testBytes = WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes(StandardCharsets.UTF_8);
     testZipUnzip(testBytes);
   }
 
   @Test
   public void testZipUnzipBestEffort() {
-    byte[] testBytes = SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes(StandardCharsets.UTF_8);
     testZipUnzipBestEffort(testBytes);
-    testBytes = LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes(StandardCharsets.UTF_8);
     testZipUnzipBestEffort(testBytes);
-    testBytes = WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes(StandardCharsets.UTF_8);
     testZipUnzipBestEffort(testBytes);
   }
 
   @Test
   public void testTruncation() {
-    byte[] testBytes = SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes(StandardCharsets.UTF_8);
     testTruncation(testBytes);
-    testBytes = LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes(StandardCharsets.UTF_8);
     testTruncation(testBytes);
-    testBytes = WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes(StandardCharsets.UTF_8);
     testTruncation(testBytes);
   }
 
   @Test
   public void testLimit() {
-    byte[] testBytes = SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes(StandardCharsets.UTF_8);
     testLimit(testBytes);
-    testBytes = LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes(StandardCharsets.UTF_8);
     testLimit(testBytes);
-    testBytes = WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes(StandardCharsets.UTF_8);
     testLimit(testBytes);
   }
 

http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/test/org/apache/nutch/util/TestNodeWalker.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/nutch/util/TestNodeWalker.java 
b/src/test/org/apache/nutch/util/TestNodeWalker.java
index db6db91..ec3669e 100644
--- a/src/test/org/apache/nutch/util/TestNodeWalker.java
+++ b/src/test/org/apache/nutch/util/TestNodeWalker.java
@@ -18,6 +18,7 @@
 package org.apache.nutch.util;
 
 import java.io.ByteArrayInputStream;
+import java.nio.charset.StandardCharsets;
 
 import org.junit.Before;
 import org.junit.Test;
@@ -60,7 +61,7 @@ public class TestNodeWalker {
           "http://apache.org/xml/features/nonvalidating/load-external-dtd";,
           false);
       parser
-          .parse(new InputSource(new 
ByteArrayInputStream(WEBPAGE.getBytes())));
+          .parse(new InputSource(new 
ByteArrayInputStream(WEBPAGE.getBytes(StandardCharsets.UTF_8))));
     } catch (Exception e) {
       e.printStackTrace();
     }

Reply via email to