Author: tallison
Date: Fri Jan 23 19:55:51 2015
New Revision: 1654351

URL: http://svn.apache.org/r1654351
Log:
TIKA-1529: turn forbidden-apis back on and clean up all mentions of UTF-8

Modified:
    tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
    tika/trunk/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
    tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
    tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java
    
tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
    
tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
    
tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java
    
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java
    tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageIdentifierTest.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfilerBuilderTest.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
    tika/trunk/tika-parent/pom.xml
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ParsingReaderTest.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmBlockInfo.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtraction.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/code/SourceCodeParserTest.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/txt/TXTParserTest.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/CSVMessageBodyWriter.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/JSONMessageBodyWriter.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataListMessageBodyWriter.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TextMessageBodyWriter.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java
    tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
    
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
    
tika/trunk/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
    
tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
    
tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java

Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
(original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Fri Jan 
23 19:55:51 2015
@@ -727,7 +727,7 @@ public class TikaCLI {
         } else if (System.getProperty("os.name")
                 .toLowerCase(Locale.ROOT).startsWith("mac os x")) {
             // TIKA-324: Override the default encoding on Mac OS X
-            return new OutputStreamWriter(output, "UTF-8");
+            return new OutputStreamWriter(output, IOUtils.UTF_8);
         } else {
             return new OutputStreamWriter(output, Charset.defaultCharset());
         }

Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java 
(original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java Fri Jan 
23 19:55:51 2015
@@ -459,7 +459,7 @@ public class TikaGUI extends JFrame
                 InputStream stream = url.openStream();
                 try {
                     StringWriter writer = new StringWriter();
-                    IOUtils.copy(stream, writer, "UTF-8");
+                    IOUtils.copy(stream, writer, IOUtils.UTF_8.name());
 
                     JEditorPane editor =
                         new JEditorPane("text/plain", writer.toString());

Modified: tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
(original)
+++ tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java Fri 
Jan 23 19:55:51 2015
@@ -16,20 +16,20 @@
  */
 package org.apache.tika.cli;
 
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.PrintStream;
 import java.net.URI;
-
 import org.apache.commons.io.FileUtils;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
 /**
  * Tests the Tika's cli
  */
@@ -49,7 +49,7 @@ public class TikaCLITest {
         outContent = new ByteArrayOutputStream();
         resourcePrefix = testDataURI.toString();
         stdout = System.out;
-        System.setOut(new PrintStream(outContent, true, "UTF-8"));
+        System.setOut(new PrintStream(outContent, true, IOUtils.UTF_8.name()));
     }
 
     /**
@@ -73,7 +73,7 @@ public class TikaCLITest {
     public void testListParserDetail() throws Exception{
         String[] params = {"--list-parser-detail"};
         TikaCLI.main(params);
-        
assertTrue(outContent.toString("UTF-8").contains("application/vnd.oasis.opendocument.text-web"));
+        
assertTrue(outContent.toString(IOUtils.UTF_8.name()).contains("application/vnd.oasis.opendocument.text-web"));
     }
 
     /**
@@ -98,7 +98,7 @@ public class TikaCLITest {
     public void testXMLOutput() throws Exception{
         String[] params = {"-x", resourcePrefix + "alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString("UTF-8").contains("?xml version=\"1.0\" 
encoding=\"UTF-8\"?"));
+        assertTrue(outContent.toString(IOUtils.UTF_8.name()).contains("?xml 
version=\"1.0\" encoding=\"UTF-8\"?"));
     }
 
     /**
@@ -112,7 +112,7 @@ public class TikaCLITest {
         TikaCLI.main(params);
         assertTrue(outContent.toString("UTF-8").contains("html 
xmlns=\"http://www.w3.org/1999/xhtml";));
         assertTrue("Expanded <title></title> element should be present",
-                outContent.toString("UTF-8").contains("<title></title>"));
+                
outContent.toString(IOUtils.UTF_8.name()).contains("<title></title>"));
     }
 
     /**
@@ -124,7 +124,7 @@ public class TikaCLITest {
     public void testTextOutput() throws Exception{
         String[] params = {"-t", resourcePrefix + "alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString("UTF-8").contains("finished off the 
cake"));
+        
assertTrue(outContent.toString(IOUtils.UTF_8.name()).contains("finished off the 
cake"));
     }
 
     /**
@@ -135,7 +135,7 @@ public class TikaCLITest {
     public void testMetadataOutput() throws Exception{
         String[] params = {"-m", resourcePrefix + "alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString("UTF-8").contains("text/plain"));
+        
assertTrue(outContent.toString(IOUtils.UTF_8.name()).contains("text/plain"));
     }
 
     /**
@@ -147,7 +147,7 @@ public class TikaCLITest {
     public void testJsonMetadataOutput() throws Exception {
         String[] params = {"--json", resourcePrefix + 
"testJsonMultipleInts.html"};
         TikaCLI.main(params);
-        String json = outContent.toString("UTF-8");
+        String json = outContent.toString(IOUtils.UTF_8.name());
         //TIKA-1310
         assertTrue(json.contains("\"fb:admins\":\"1,2,3,4\","));
         
@@ -168,7 +168,7 @@ public class TikaCLITest {
     public void testJsonMetadataPrettyPrintOutput() throws Exception {
         String[] params = {"--json", "-r", resourcePrefix + 
"testJsonMultipleInts.html"};
         TikaCLI.main(params);
-        String json = outContent.toString("UTF-8");
+        String json = outContent.toString(IOUtils.UTF_8.name());
 
         assertTrue(json.contains("  \"X-Parsed-By\": [\n" +
                 "    \"org.apache.tika.parser.DefaultParser\",\n" +
@@ -191,7 +191,7 @@ public class TikaCLITest {
     public void testLanguageOutput() throws Exception{
         String[] params = {"-l", resourcePrefix + "alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString("UTF-8").contains("en"));
+        assertTrue(outContent.toString(IOUtils.UTF_8.name()).contains("en"));
     }
 
     /**
@@ -203,7 +203,7 @@ public class TikaCLITest {
     public void testDetectOutput() throws Exception{
         String[] params = {"-d", resourcePrefix + "alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString("UTF-8").contains("text/plain"));
+        
assertTrue(outContent.toString(IOUtils.UTF_8.name()).contains("text/plain"));
     }
 
     /**
@@ -215,7 +215,7 @@ public class TikaCLITest {
     public void testListMetModels() throws Exception{
         String[] params = {"--list-met-models", resourcePrefix + 
"alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString("UTF-8").contains("text/plain"));
+        
assertTrue(outContent.toString(IOUtils.UTF_8.name()).contains("text/plain"));
     }
 
     /**
@@ -227,7 +227,7 @@ public class TikaCLITest {
     public void testListSupportedTypes() throws Exception{
         String[] params = {"--list-supported-types", resourcePrefix + 
"alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString("UTF-8").contains("supertype: 
application/octet-stream"));
+        
assertTrue(outContent.toString(IOUtils.UTF_8.name()).contains("supertype: 
application/octet-stream"));
     }
 
     /**
@@ -300,7 +300,7 @@ public class TikaCLITest {
     public void testMultiValuedMetadata() throws Exception {
         String[] params = {"-m", resourcePrefix + 
"testMultipleSheets.numbers"};
         TikaCLI.main(params);
-        String content = outContent.toString("UTF-8");
+        String content = outContent.toString(IOUtils.UTF_8.name());
         assertTrue(content.contains("sheetNames: Checking"));
         assertTrue(content.contains("sheetNames: Secon sheet"));
         assertTrue(content.contains("sheetNames: Logical Sheet 3"));
@@ -314,7 +314,7 @@ public class TikaCLITest {
         new File("subdir/foo.txt").delete();
         new File("subdir").delete();
         TikaCLI.main(params);
-        String content = outContent.toString("UTF-8");
+        String content = outContent.toString(IOUtils.UTF_8.name());
         assertTrue(content.contains("Extracting 'subdir/foo.txt'"));
         // clean up. TODO: These should be in target.
         new File("target/subdir/foo.txt").delete();
@@ -340,7 +340,7 @@ public class TikaCLITest {
     public void testConfig() throws Exception {
         String[] params = new 
String[]{"--config="+testDataFile.toString()+"/tika-config1.xml", 
resourcePrefix+"bad_xml.xml"};
         TikaCLI.main(params);
-        String content = outContent.toString("UTF-8");
+        String content = outContent.toString(IOUtils.UTF_8.name());
         assertTrue(content.contains("apple"));
         assertTrue(content.contains("org.apache.tika.parser.html.HtmlParser"));
     }
@@ -349,7 +349,7 @@ public class TikaCLITest {
     public void testJsonRecursiveMetadataParserMetadataOnly() throws Exception 
{
         String[] params = new String[]{"-m", "-J", "-r", 
resourcePrefix+"test_recursive_embedded.docx"};
         TikaCLI.main(params);
-        String content = outContent.toString("UTF-8");
+        String content = outContent.toString(IOUtils.UTF_8.name());
         assertTrue(content.contains("[\n" +
                 "  {\n" +
                 "    \"Application-Name\": \"Microsoft Office Word\",\n" +
@@ -365,7 +365,7 @@ public class TikaCLITest {
     public void testJsonRecursiveMetadataParserDefault() throws Exception {
         String[] params = new String[]{"-J", "-r", 
resourcePrefix+"test_recursive_embedded.docx"};
         TikaCLI.main(params);
-        String content = outContent.toString("UTF-8");
+        String content = outContent.toString(IOUtils.UTF_8.name());
         assertTrue(content.contains("\"X-TIKA:content\": \"\\u003chtml 
xmlns\\u003d\\\"http://www.w3.org/1999/xhtml";));
     }
 
@@ -373,7 +373,7 @@ public class TikaCLITest {
     public void testJsonRecursiveMetadataParserText() throws Exception {
         String[] params = new String[]{"-J", "-r", "-t", 
resourcePrefix+"test_recursive_embedded.docx"};
         TikaCLI.main(params);
-        String content = outContent.toString("UTF-8");
+        String content = outContent.toString(IOUtils.UTF_8.name());
         assertTrue(content.contains("\\n\\nembed_4\\n"));
         assertTrue(content.contains("\\n\\nembed_0"));
     }

Modified: 
tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java 
(original)
+++ tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java 
Fri Jan 23 19:55:51 2015
@@ -41,6 +41,7 @@ import org.apache.tika.config.TikaConfig
 import org.apache.tika.detect.DefaultDetector;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.fork.ForkParser;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
@@ -101,7 +102,7 @@ public class BundleIT {
         ForkParser parser = (ForkParser) 
bc.getService(bc.getServiceReference(ForkParser.class.getName()));
         ClassLoader classLoader = parser.getClass().getClassLoader();
         String data = "<!DOCTYPE html>\n<html><body><p>test 
<span>content</span></p></body></html>";
-        InputStream stream = new ByteArrayInputStream(data.getBytes("UTF-8"));
+        InputStream stream = new 
ByteArrayInputStream(data.getBytes(IOUtils.UTF_8));
         Writer writer = new StringWriter();
         ContentHandler contentHandler = new BodyContentHandler(writer);
         Metadata metadata = new Metadata();

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java 
Fri Jan 23 19:55:51 2015
@@ -29,6 +29,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
+import org.apache.tika.io.IOUtils;
 
 /**
  * Internal utility class that Tika uses to look up service providers.
@@ -329,7 +330,7 @@ public class ServiceLoader {
         InputStream stream = resource.openStream();
         try {
             BufferedReader reader =
-                new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+                new BufferedReader(new InputStreamReader(stream, 
IOUtils.UTF_8));
             String line = reader.readLine();
             while (line != null) {
                 line = COMMENT.matcher(line).replaceFirst("");

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java 
Fri Jan 23 19:55:51 2015
@@ -19,14 +19,13 @@ package org.apache.tika.detect;
 import java.io.CharArrayWriter;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
 import java.util.Locale;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 
@@ -99,11 +98,7 @@ public class MagicDetector implements De
         } else if (type.equals("stringignorecase")) {
             decoded = decodeString(value.toLowerCase(Locale.ROOT), type);
         } else if (type.equals("byte")) {
-            try {
-                decoded = tmpVal.getBytes("UTF-8");
-            } catch (UnsupportedEncodingException e) {
-                throw new AssertionError("UTF-8 not supported.");
-            }
+            decoded = tmpVal.getBytes(IOUtils.UTF_8);
         } else if (type.equals("host16") || type.equals("little16")) {
             int i = Integer.parseInt(tmpVal, radix);
             decoded = new byte[] { (byte) (i & 0x00FF), (byte) (i >> 8) };
@@ -399,7 +394,7 @@ public class MagicDetector implements De
                     flags = Pattern.CASE_INSENSITIVE;
                 }
                 
-                Pattern p = Pattern.compile(new String(this.pattern, "UTF-8"), 
flags);
+                Pattern p = Pattern.compile(new String(this.pattern, 
IOUtils.UTF_8), flags);
 
                 ByteBuffer bb = ByteBuffer.wrap(buffer);
                 CharBuffer result = ISO_8859_1.decode(bb);

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java 
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java 
Fri Jan 23 19:55:51 2015
@@ -22,6 +22,7 @@ import java.net.URLDecoder;
 import java.util.Map;
 import java.util.regex.Pattern;
 
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 
@@ -119,7 +120,7 @@ public class NameDetector implements Det
             int percent = name.indexOf('%');
             if (percent != -1) {
                 try {
-                    name = URLDecoder.decode(name, "UTF-8");
+                    name = URLDecoder.decode(name, IOUtils.UTF_8.name());
                 } catch (UnsupportedEncodingException e) {
                     throw new IllegalStateException("UTF-8 not supported", e);
                 }

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
 (original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
 Fri Jan 23 19:55:51 2015
@@ -413,7 +413,7 @@ public class ExternalEmbedder implements
             if (process.exitValue() != 0) {
                 throw new TikaException("There was an error executing the 
command line" +
                         "\nExecutable Command:\n\n" + cmd +
-                        "\nExecutable Error:\n\n" + 
stdErrOutputStream.toString("UTF-8"));
+                        "\nExecutable Error:\n\n" + 
stdErrOutputStream.toString(IOUtils.UTF_8.name()));
             }
         }
     }

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java 
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java Fri 
Jan 23 19:55:51 2015
@@ -263,7 +263,7 @@ class ForkClient {
             String manifest =
                 "Main-Class: " + ForkServer.class.getName() + "\n";
             jar.putNextEntry(new ZipEntry("META-INF/MANIFEST.MF"));
-            jar.write(manifest.getBytes("UTF-8"));
+            jar.write(manifest.getBytes(IOUtils.UTF_8));
 
             Class<?>[] bootstrap = {
                     ForkServer.class, ForkObjectInputStream.class,

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/IOUtils.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/IOUtils.java 
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/IOUtils.java Fri Jan 
23 19:55:51 2015
@@ -28,9 +28,9 @@ import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Reader;
 import java.io.StringWriter;
-import java.io.UnsupportedEncodingException;
 import java.io.Writer;
 import java.nio.channels.Channel;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -77,6 +77,9 @@ import java.util.List;
  */
 public class IOUtils {
 
+    //TODO: switch to StandardCharsets when we move to Java 1.7
+    public static final Charset UTF_8 = Charset.forName("UTF-8");
+
     /**
      * The default buffer size to use.
      */
@@ -255,7 +258,7 @@ public class IOUtils {
      */
     @Deprecated
     public static byte[] toByteArray(String input) throws IOException {
-        return input.getBytes("UTF-8");
+        return input.getBytes(IOUtils.UTF_8);
     }
 
     // read char[]
@@ -393,7 +396,7 @@ public class IOUtils {
      */
     @Deprecated
     public static String toString(byte[] input) throws IOException {
-        return new String(input, "UTF-8");
+        return new String(input, IOUtils.UTF_8);
     }
 
     /**
@@ -415,7 +418,7 @@ public class IOUtils {
             throws IOException {
         // If no encoding is specified, default to UTF-8.
         if (encoding == null) {
-            return new String(input, "UTF-8");
+            return new String(input, IOUtils.UTF_8);
         } else {
             return new String(input, encoding);
         }
@@ -437,7 +440,7 @@ public class IOUtils {
      * @since Commons IO 1.1
      */
     public static List<String> readLines(InputStream input) throws IOException 
{
-        InputStreamReader reader = new InputStreamReader(input, "UTF-8");
+        InputStreamReader reader = new InputStreamReader(input, IOUtils.UTF_8);
         return readLines(reader);
     }
 
@@ -531,13 +534,8 @@ public class IOUtils {
      * @since Commons IO 1.1
      */
     public static InputStream toInputStream(String input) {
-        try {
-            byte[] bytes = input.getBytes("UTF-8");
-            return new ByteArrayInputStream(bytes);
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
-
+        byte[] bytes = input.getBytes(IOUtils.UTF_8);
+        return new ByteArrayInputStream(bytes);
     }
 
     /**
@@ -554,7 +552,7 @@ public class IOUtils {
      * @since Commons IO 1.1
      */
     public static InputStream toInputStream(String input, String encoding) 
throws IOException {
-        byte[] bytes = encoding != null ? input.getBytes(encoding) : 
input.getBytes("UTF-8");
+        byte[] bytes = encoding != null ? input.getBytes(encoding) : 
input.getBytes(IOUtils.UTF_8);
         return new ByteArrayInputStream(bytes);
     }
 
@@ -592,7 +590,7 @@ public class IOUtils {
      */
     public static void write(byte[] data, Writer output) throws IOException {
         if (data != null) {
-            output.write(new String(data, "UTF-8"));
+            output.write(new String(data, IOUtils.UTF_8));
         }
     }
 
@@ -660,7 +658,7 @@ public class IOUtils {
     public static void write(char[] data, OutputStream output)
             throws IOException {
         if (data != null) {
-            output.write(new String(data).getBytes("UTF-8"));
+            output.write(new String(data).getBytes(IOUtils.UTF_8));
         }
     }
 
@@ -786,7 +784,7 @@ public class IOUtils {
     public static void write(String data, OutputStream output)
             throws IOException {
         if (data != null) {
-            output.write(data.getBytes("UTF-8"));
+            output.write(data.getBytes(IOUtils.UTF_8));
         }
     }
 
@@ -855,7 +853,7 @@ public class IOUtils {
     public static void write(StringBuffer data, OutputStream output)
             throws IOException {
         if (data != null) {
-            output.write(data.toString().getBytes("UTF-8"));
+            output.write(data.toString().getBytes(IOUtils.UTF_8));
         }
     }
 
@@ -961,7 +959,7 @@ public class IOUtils {
      */
     public static void copy(InputStream input, Writer output)
             throws IOException {
-        InputStreamReader in = new InputStreamReader(input, "UTF-8");
+        InputStreamReader in = new InputStreamReader(input, IOUtils.UTF_8);
         copy(in, output);
     }
 
@@ -1068,7 +1066,7 @@ public class IOUtils {
      */
     public static void copy(Reader input, OutputStream output)
             throws IOException {
-        OutputStreamWriter out = new OutputStreamWriter(output, "UTF-8");
+        OutputStreamWriter out = new OutputStreamWriter(output, IOUtils.UTF_8);
         copy(input, out);
         // XXX Unless anyone is planning on rewriting OutputStreamWriter, we
         // have to flush here.

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
 (original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
 Fri Jan 23 19:55:51 2015
@@ -25,6 +25,8 @@ import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
 
+import org.apache.tika.io.IOUtils;
+
 /**
  * Identifier of the language that best matches a given content profile.
  * The content profile is compared to generic language profiles based on
@@ -44,7 +46,6 @@ public class LanguageIdentifier {
     private static final Map<String, LanguageProfile> PROFILES =
         new HashMap<String, LanguageProfile>();
     private static final String PROFILE_SUFFIX = ".ngp";
-    private static final String PROFILE_ENCODING = "UTF-8";
 
     private static Properties props = new Properties();
     private static String errors = "";
@@ -76,7 +77,7 @@ public class LanguageIdentifier {
                 LanguageIdentifier.class.getResourceAsStream(language + 
PROFILE_SUFFIX);
             try {
                 BufferedReader reader =
-                    new BufferedReader(new InputStreamReader(stream, 
PROFILE_ENCODING));
+                    new BufferedReader(new InputStreamReader(stream, 
IOUtils.UTF_8));
                 String line = reader.readLine();
                 while (line != null) {
                     if (line.length() > 0 && !line.startsWith("#")) {

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java
 (original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java
 Fri Jan 23 19:55:51 2015
@@ -33,8 +33,9 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import org.apache.tika.exception.TikaException;
 
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 /**
  * This class runs a ngram analysis over submitted text, results might be used
  * for automatic language identification.
@@ -341,7 +342,7 @@ public class LanguageProfilerBuilder {
 
         ngrams.clear();
         ngramcounts = new int[maxLength + 1];
-        BufferedReader reader = new BufferedReader(new InputStreamReader(is, 
"UTF-8"));
+        BufferedReader reader = new BufferedReader(new InputStreamReader(is, 
IOUtils.UTF_8));
         String line = null;
 
         while ((line = reader.readLine()) != null) {
@@ -405,7 +406,7 @@ public class LanguageProfilerBuilder {
      */
     public void save(OutputStream os) throws IOException {
         os.write(("# NgramProfile generated at " + new Date() + 
-                  " for Apache Tika Language 
Identification\n").getBytes("UTF-8"));
+                  " for Apache Tika Language 
Identification\n").getBytes(IOUtils.UTF_8));
 
         // And then each ngram
 
@@ -432,7 +433,7 @@ public class LanguageProfilerBuilder {
         for (int i = 0; i < list.size(); i++) {
             NGramEntry e = list.get(i);
             String line = e.toString() + " " + e.getCount() + "\n";
-            os.write(line.getBytes("UTF-8"));
+            os.write(line.getBytes(IOUtils.UTF_8));
         }
         os.flush();
     }

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
 (original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
 Fri Jan 23 19:55:51 2015
@@ -24,7 +24,6 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
 import java.io.Reader;
-import java.io.UnsupportedEncodingException;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Map;
@@ -232,7 +231,7 @@ public class ExternalParser extends Abst
      */
     private void extractOutput(InputStream stream, XHTMLContentHandler xhtml)
             throws SAXException, IOException {
-        Reader reader = new InputStreamReader(stream, "UTF-8");
+        Reader reader = new InputStreamReader(stream, IOUtils.UTF_8);
         try {
             xhtml.startDocument();
             xhtml.startElement("p");
@@ -293,11 +292,7 @@ public class ExternalParser extends Abst
        new Thread() {
           public void run() {
              BufferedReader reader;
-             try {
-                 reader = new BufferedReader(new InputStreamReader(stream, 
"UTF-8"));
-             } catch (UnsupportedEncodingException e) {
-                 throw new AssertionError("UTF-8 not supported.");
-             }
+              reader = new BufferedReader(new InputStreamReader(stream, 
IOUtils.UTF_8));
              try {
                 String line;
                 while ( (line = reader.readLine()) != null ) {

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java 
(original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java 
Fri Jan 23 19:55:51 2015
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.Arrays;
 
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.junit.Test;
@@ -54,8 +55,8 @@ public class TextDetectorTest {
 
     @Test
     public void testDetectText() throws Exception {
-        assertText("Hello, World!".getBytes("UTF-8"));
-        assertText(" \t\r\n".getBytes("UTF-8"));
+        assertText("Hello, World!".getBytes(IOUtils.UTF_8));
+        assertText(" \t\r\n".getBytes(IOUtils.UTF_8));
         assertNotText(new byte[] { -1, -2, -3, 0x09, 0x0A, 0x0C, 0x0D, 0x1B });
         assertNotText(new byte[] { 0 });
         assertNotText(new byte[] { 'H', 'e', 'l', 'l', 'o', 0 });

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java 
(original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java 
Fri Jan 23 19:55:51 2015
@@ -23,7 +23,6 @@ import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
 import java.util.Arrays;
 
 import org.junit.Test;
@@ -69,11 +68,7 @@ public class TailStreamTest
      */
     private static InputStream generateStream(int from, int length)
     {
-        try {
-            return new ByteArrayInputStream(generateText(from, 
length).getBytes("UTF-8"));
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+        return new ByteArrayInputStream(generateText(from, 
length).getBytes(IOUtils.UTF_8));
     }
 
     /**
@@ -128,7 +123,7 @@ public class TailStreamTest
         TailStream stream = new TailStream(generateStream(0, 2 * count), 
count);
         readStream(stream);
         assertEquals("Wrong buffer", generateText(count, count), new String(
-                stream.getTail(), "UTF-8"));
+                stream.getTail(), IOUtils.UTF_8));
     }
 
     /**
@@ -149,7 +144,7 @@ public class TailStreamTest
             read = stream.read(buf);
         }
         assertEquals("Wrong buffer", generateText(count - tailSize, tailSize),
-                new String(stream.getTail(), "UTF-8"));
+                new String(stream.getTail(), IOUtils.UTF_8));
         stream.close();
     }
 
@@ -169,7 +164,7 @@ public class TailStreamTest
         stream.reset();
         readStream(stream);
         assertEquals("Wrong buffer", generateText(tailSize, tailSize),
-                new String(stream.getTail(), "UTF-8"));
+                new String(stream.getTail(), IOUtils.UTF_8));
     }
 
     /**
@@ -185,7 +180,7 @@ public class TailStreamTest
         byte[] buf = new byte[count];
         stream.read(buf);
         assertEquals("Wrong buffer", generateText(count - tailSize, tailSize),
-                new String(stream.getTail(), "UTF-8"));
+                new String(stream.getTail(), IOUtils.UTF_8));
         stream.close();
     }
 
@@ -202,7 +197,7 @@ public class TailStreamTest
         assertEquals("Wrong skip result", skipCount, stream.skip(skipCount));
         assertEquals("Wrong buffer",
                 generateText(skipCount - tailSize, tailSize),
-                new String(stream.getTail(), "UTF-8"));
+                new String(stream.getTail(), IOUtils.UTF_8));
         stream.close();
     }
 
@@ -216,7 +211,7 @@ public class TailStreamTest
         TailStream stream = new TailStream(generateStream(0, count), 2 * 
count);
         assertEquals("Wrong skip result", count, stream.skip(2 * count));
         assertEquals("Wrong buffer", generateText(0, count),
-                new String(stream.getTail(), "UTF-8"));
+                new String(stream.getTail(), IOUtils.UTF_8));
         stream.close();
     }
 

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java 
(original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java 
Fri Jan 23 19:55:51 2015
@@ -16,6 +16,10 @@
  */
 package org.apache.tika.io;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
@@ -27,11 +31,7 @@ import java.io.OutputStream;
 import java.net.URL;
 
 import org.apache.tika.metadata.Metadata;
-
 import org.junit.Test;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
 
 public class TikaInputStreamTest {
 
@@ -62,7 +62,7 @@ public class TikaInputStreamTest {
     @Test
     public void testStreamBased() throws IOException {
         InputStream input =
-            new ByteArrayInputStream("Hello, World!".getBytes("UTF-8"));
+            new ByteArrayInputStream("Hello, World!".getBytes(IOUtils.UTF_8));
         InputStream stream = TikaInputStream.get(input);
 
         File file = TikaInputStream.get(stream).getFile();
@@ -89,7 +89,7 @@ public class TikaInputStreamTest {
         File file = File.createTempFile("tika-", ".tmp");
         OutputStream stream = new FileOutputStream(file);
         try {
-            stream.write(data.getBytes("UTF-8"));
+            stream.write(data.getBytes(IOUtils.UTF_8));
         } finally {
             stream.close();
         }
@@ -108,7 +108,7 @@ public class TikaInputStreamTest {
     private String readStream(InputStream stream) throws IOException {
         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
         IOUtils.copy(stream, buffer);
-        return buffer.toString("UTF-8");
+        return buffer.toString(IOUtils.UTF_8.name());
     }
 
     @Test

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageIdentifierTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageIdentifierTest.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageIdentifierTest.java
 (original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageIdentifierTest.java
 Fri Jan 23 19:55:51 2015
@@ -16,16 +16,16 @@
  */
 package org.apache.tika.language;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Writer;
 import java.util.HashMap;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
 import org.apache.tika.io.IOUtils;
 import org.junit.Before;
 import org.junit.Test;
@@ -139,7 +139,7 @@ public class LanguageIdentifierTest {
         InputStream stream =
             LanguageIdentifierTest.class.getResourceAsStream(language + 
".test");
         try {
-            IOUtils.copy(new InputStreamReader(stream, "UTF-8"), writer);
+            IOUtils.copy(new InputStreamReader(stream, IOUtils.UTF_8), writer);
         } finally {
             stream.close();
         }

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfilerBuilderTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfilerBuilderTest.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfilerBuilderTest.java
 (original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfilerBuilderTest.java
 Fri Jan 23 19:55:51 2015
@@ -17,6 +17,9 @@
 
 package org.apache.tika.language;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
@@ -27,12 +30,10 @@ import java.io.InputStreamReader;
 import java.net.URISyntaxException;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.junit.After;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
 public class LanguageProfilerBuilderTest {
     /* Test members */
     private LanguageProfilerBuilder ngramProfile = null;
@@ -40,7 +41,6 @@ public class LanguageProfilerBuilderTest
     private final String profileName = 
"../tika-core/src/test/resources/org/apache/tika/language/langbuilder/"
             + LanguageProfilerBuilderTest.class.getName();
     private final String corpusName = "langbuilder/welsh_corpus.txt";
-    private final String encoding = "UTF-8";
     private final String FILE_EXTENSION = "ngp";
     private final String LANGUAGE = "welsh";
     private final int maxlen = 1000;
@@ -50,7 +50,7 @@ public class LanguageProfilerBuilderTest
         InputStream is =
                 
LanguageProfilerBuilderTest.class.getResourceAsStream(corpusName);
         try {
-            ngramProfile = LanguageProfilerBuilder.create(profileName, is , 
encoding);
+            ngramProfile = LanguageProfilerBuilder.create(profileName, is , 
IOUtils.UTF_8.name());
         } finally {
             is.close();
         }
@@ -82,7 +82,7 @@ public class LanguageProfilerBuilderTest
                 + FILE_EXTENSION));
         try {
             BufferedReader reader = new BufferedReader(new InputStreamReader(
-                    stream, encoding));
+                    stream, IOUtils.UTF_8));
             String line = reader.readLine();
             while (line != null) {
                 if (line.length() > 0 && !line.startsWith("#")) {// skips the

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java 
(original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java 
Fri Jan 23 19:55:51 2015
@@ -25,8 +25,8 @@ import java.io.InputStream;
 import java.net.URL;
 
 import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
-
 import org.junit.Before;
 import org.junit.Test;
 
@@ -85,7 +85,7 @@ public class MimeDetectionTest {
                 new ByteArrayInputStream("\ufefftest".getBytes("UTF-16BE")),
                 new Metadata()));
         assertEquals(MediaType.TEXT_PLAIN, mimeTypes.detect(
-                new ByteArrayInputStream("\ufefftest".getBytes("UTF-8")),
+                new ByteArrayInputStream("\ufefftest".getBytes(IOUtils.UTF_8)),
                 new Metadata()));
     }
 
@@ -195,7 +195,7 @@ public class MimeDetectionTest {
     @Test
     public void testNotXML() throws IOException {
         assertEquals(MediaType.TEXT_PLAIN, mimeTypes.detect(
-                new ByteArrayInputStream("<!-- test -->".getBytes("UTF-8")),
+                new ByteArrayInputStream("<!-- test 
-->".getBytes(IOUtils.UTF_8)),
                 new Metadata()));
     }
 
@@ -219,7 +219,7 @@ public class MimeDetectionTest {
      */
     @Test    
     public void testMimeMagicClashSamePriority() throws IOException {
-        byte[] helloWorld = "Hello, World!".getBytes("UTF-8");
+        byte[] helloWorld = "Hello, World!".getBytes(IOUtils.UTF_8);
         MediaType helloType = MediaType.parse("hello/world-file");
         MediaType helloXType = MediaType.parse("hello/x-world-hello");
         Metadata metadata;

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
 (original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
 Fri Jan 23 19:55:51 2015
@@ -16,6 +16,10 @@
  */
 package org.apache.tika.sax;
 
+import static junit.framework.Assert.assertFalse;
+import static junit.framework.Assert.assertTrue;
+import static org.junit.Assert.assertEquals;
+
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -23,6 +27,7 @@ import java.io.UnsupportedEncodingExcept
 import java.util.Set;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
@@ -34,15 +39,12 @@ import org.xml.sax.SAXException;
 import org.xml.sax.helpers.AttributesImpl;
 import org.xml.sax.helpers.DefaultHandler;
 
-import static junit.framework.Assert.assertFalse;
-import static junit.framework.Assert.assertTrue;
-import static org.junit.Assert.assertEquals;
-
 /**
  * Test cases for the {@link org.apache.tika.sax.BodyContentHandler} class.
  */
 public class BasicContentHandlerFactoryTest {
-    private static final String ENCODING = "UTF-8";
+
+    private static final String ENCODING = IOUtils.UTF_8.name();
     //default max char len (at least in WriteOutContentHandler is 100k)
     private static final int OVER_DEFAULT = 120000;
 

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
 (original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
 Fri Jan 23 19:55:51 2015
@@ -21,6 +21,7 @@ import static org.junit.Assert.assertEqu
 import java.io.ByteArrayOutputStream;
 import java.io.OutputStream;
 
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
 import org.junit.Test;
 
@@ -45,7 +46,7 @@ public class BodyContentHandlerTest {
         xhtml.element("p", "Test text");
         xhtml.endDocument();
 
-        assertEquals("Test text\n", buffer.toString("UTF-8"));
+        assertEquals("Test text\n", buffer.toString(IOUtils.UTF_8.name()));
     }
 
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
 Fri Jan 23 19:55:51 2015
@@ -16,20 +16,6 @@ package org.apache.tika.example;
  * limitations under the License.
  */
 
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.detect.DefaultDetector;
-import org.apache.tika.detect.Detector;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.language.translate.DefaultTranslator;
-import org.apache.tika.language.translate.Translator;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.CompositeParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Node;
-
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.transform.OutputKeys;
@@ -37,18 +23,35 @@ import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
+
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.StringWriter;
 import java.io.Writer;
+import java.nio.charset.Charset;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.language.translate.DefaultTranslator;
+import org.apache.tika.language.translate.Translator;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.CompositeParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+
 
 /**
  * This class shows how to dump a TikaConfig object to a configuration file.
@@ -187,19 +190,19 @@ public class DumpTikaConfigExample {
      */
     public static void main(String[] args) throws Exception {
 
-        String encoding = "UTF-8";
+        Charset encoding = IOUtils.UTF_8;
         Writer writer = null;
         if (args.length > 0) {
-            writer = new OutputStreamWriter(new FileOutputStream(new 
File(args[0])));
+            writer = new OutputStreamWriter(new FileOutputStream(new 
File(args[0])), encoding);
         } else {
             writer = new StringWriter();
         }
 
         if (args.length > 1) {
-            encoding = args[1];
+            encoding = Charset.forName(args[1]);
         }
         DumpTikaConfigExample ex = new DumpTikaConfigExample();
-        ex.dump(TikaConfig.getDefaultConfig(), writer, encoding);
+        ex.dump(TikaConfig.getDefaultConfig(), writer, encoding.name());
 
         writer.flush();
 

Modified: tika/trunk/tika-parent/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parent/pom.xml?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- tika/trunk/tika-parent/pom.xml (original)
+++ tika/trunk/tika-parent/pom.xml Fri Jan 23 19:55:51 2015
@@ -213,6 +213,14 @@
         <role>committer</role>
       </roles>
     </developer>
+    <developer>
+      <name>Tim Allison</name>
+      <id>tallison</id>
+      <timezone>-5</timezone>
+      <roles>
+        <role>committer</role>
+      </roles>
+    </developer>
   </developers>
   <contributors>
     <contributor>
@@ -274,7 +282,6 @@
   </properties>
 
   <build>
-    <pluginManagement>
       <plugins>
         <plugin>
           <artifactId>maven-compiler-plugin</artifactId>
@@ -287,7 +294,7 @@
         <plugin>
           <groupId>de.thetaphi</groupId>
           <artifactId>forbiddenapis</artifactId>
-          <version>1.6.1</version>
+          <version>1.7</version>
           <configuration>
             <targetVersion>${maven.compiler.target}</targetVersion>
             <internalRuntimeForbidden>true</internalRuntimeForbidden>
@@ -321,6 +328,36 @@
           <artifactId>maven-shade-plugin</artifactId>
           <version>2.3</version>
         </plugin>
+    </plugins>
+
+    <pluginManagement>
+      <plugins>
+        <!--This plugin's configuration is used to store Eclipse m2e settings 
only. It has no influence on the Maven build itself.-->
+        <plugin>
+          <groupId>org.eclipse.m2e</groupId>
+          <artifactId>lifecycle-mapping</artifactId>
+          <version>1.0.0</version>
+          <configuration>
+            <lifecycleMappingMetadata>
+              <pluginExecutions>
+                <pluginExecution>
+                  <pluginExecutionFilter>
+                    <groupId>de.thetaphi</groupId>
+                    <artifactId>forbiddenapis</artifactId>
+                    <versionRange>[1.0,)</versionRange>
+                    <goals>
+                      <goal>check</goal>
+                      <goal>testCheck</goal>
+                    </goals>
+                  </pluginExecutionFilter>
+                  <action>
+                    <ignore/>
+                  </action>
+                </pluginExecution>
+              </pluginExecutions>
+            </lifecycleMappingMetadata>
+          </configuration>
+        </plugin>
       </plugins>
     </pluginManagement>
   </build>

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
 Fri Jan 23 19:55:51 2015
@@ -16,11 +16,11 @@
  */
 package org.apache.tika.parser.chm.accessor;
 
-import java.io.UnsupportedEncodingException;
 import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.parser.chm.core.ChmCommons;
 import org.apache.tika.parser.chm.core.ChmConstants;
 import org.apache.tika.parser.chm.exception.ChmParsingException;
@@ -232,13 +232,10 @@ public class ChmDirectoryListingSet {
                     
                     DirectoryListingEntry dle = new DirectoryListingEntry();
                     dle.setNameLength(strlen);
-                    try {
-                        dle.setName(new String(ChmCommons.copyOfRange(
+                    dle.setName(new String(ChmCommons.copyOfRange(
                                 dir_chunk, placeHolder,
-                                (placeHolder + dle.getNameLength())), 
"UTF-8"));
-                    } catch (UnsupportedEncodingException ex) {
-                        dle.setName(new String(dir_chunk, placeHolder, 
placeHolder + dle.getNameLength()));
-                    }
+                                (placeHolder + dle.getNameLength())), 
IOUtils.UTF_8));
+
                     checkControlData(dle);
                     checkResetTable(dle);
                     setPlaceHolder(placeHolder

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
 Fri Jan 23 19:55:51 2015
@@ -16,10 +16,10 @@
  */
 package org.apache.tika.parser.chm.accessor;
 
-import java.io.UnsupportedEncodingException;
 import java.math.BigInteger;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.parser.chm.assertion.ChmAssert;
 import org.apache.tika.parser.chm.core.ChmConstants;
 import org.apache.tika.parser.chm.exception.ChmParsingException;
@@ -62,11 +62,7 @@ public class ChmItsfHeader implements Ch
     private int currentPlace = 0;
 
     public ChmItsfHeader() {
-        try {
-            signature = ChmConstants.ITSF.getBytes("UTF-8"); /* 0 (ITSF) */
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+        signature = ChmConstants.ITSF.getBytes(IOUtils.UTF_8); /* 0 (ITSF) */
     }
 
     /**
@@ -74,11 +70,7 @@ public class ChmItsfHeader implements Ch
      */
     public String toString() {
         StringBuilder sb = new StringBuilder();
-        try {
-            sb.append(new String(getSignature(), "UTF-8") + " ");
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+        sb.append(new String(getSignature(), IOUtils.UTF_8) + " ");
         sb.append(getVersion() + " ");
         sb.append(getHeaderLen() + " ");
         sb.append(getUnknown_000c() + " ");
@@ -471,12 +463,8 @@ public class ChmItsfHeader implements Ch
         chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data, 
chmItsfHeader.getUnknownLen()));
         chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data, 
chmItsfHeader.getDirOffset()));
         chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, 
chmItsfHeader.getDirLen()));
-        try {
-            if (!new String(chmItsfHeader.getSignature(), 
"UTF-8").equals(ChmConstants.ITSF))
-                throw new TikaException("seems not valid file");
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+        if (!new String(chmItsfHeader.getSignature(), 
IOUtils.UTF_8).equals(ChmConstants.ITSF))
+            throw new TikaException("seems not valid file");
         if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
             if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
                 throw new TikaException("something wrong with header");

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
 Fri Jan 23 19:55:51 2015
@@ -16,14 +16,15 @@
  */
 package org.apache.tika.parser.chm.accessor;
 
+import java.io.UnsupportedEncodingException;
+
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.parser.chm.assertion.ChmAssert;
 import org.apache.tika.parser.chm.core.ChmCommons;
 import org.apache.tika.parser.chm.core.ChmConstants;
 import org.apache.tika.parser.chm.exception.ChmParsingException;
 
-import java.io.UnsupportedEncodingException;
-
 /**
  * Directory header The directory starts with a header; its format is as
  * follows: 0000: char[4] 'ITSP' 0004: DWORD Version number 1 0008: DWORD 
Length
@@ -68,25 +69,17 @@ public class ChmItspHeader implements Ch
     private int currentPlace = 0;
 
     public ChmItspHeader() {
-        try {
-            signature = ChmConstants.ITSP.getBytes("UTF-8"); /*
+        signature = ChmConstants.ITSP.getBytes(IOUtils.UTF_8); /*
                                                                           * 0
                                                                           * 
(ITSP
                                                                           * )
                                                                           */
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
     }
 
     public String toString() {
         StringBuilder sb = new StringBuilder();
-        try {
-            sb.append("[ signature:=" + new String(getSignature(), "UTF-8")
-                    + System.getProperty("line.separator"));
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+        sb.append("[ signature:=" + new String(getSignature(), IOUtils.UTF_8)
+                + System.getProperty("line.separator"));
         sb.append("version:=\t" + getVersion()
                 + System.getProperty("line.separator"));
         sb.append("header_len:=\t" + getHeader_len()
@@ -544,12 +537,9 @@ public class ChmItspHeader implements Ch
                         ChmConstants.BYTE_ARRAY_LENGHT));
 
         /* Checks validity of the itsp header */
-        try {
-            if (!new String(chmItspHeader.getSignature(), 
"UTF-8").equals(ChmConstants.ITSP))
+        if (!new String(chmItspHeader.getSignature(), 
IOUtils.UTF_8).equals(ChmConstants.ITSP))
                 throw new ChmParsingException("seems not valid signature");
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+
         if (chmItspHeader.getVersion() != ChmConstants.CHM_VER_1)
             throw new ChmParsingException("!=ChmConstants.CHM_VER_1");
 

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
 Fri Jan 23 19:55:51 2015
@@ -16,13 +16,14 @@
  */
 package org.apache.tika.parser.chm.accessor;
 
+import java.io.UnsupportedEncodingException;
+
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.parser.chm.assertion.ChmAssert;
 import org.apache.tika.parser.chm.core.ChmConstants;
 import org.apache.tika.parser.chm.exception.ChmParsingException;
 
-import java.io.UnsupportedEncodingException;
-
 /**
  * 
  * ::DataSpace/Storage/<SectionName>/ControlData This file contains $20 bytes 
of
@@ -54,15 +55,11 @@ public class ChmLzxcControlData implemen
     private int currentPlace = 0;
 
     public ChmLzxcControlData() {
-        try {
-            signature = ChmConstants.LZXC.getBytes("UTF-8"); /*
+        signature = ChmConstants.LZXC.getBytes(IOUtils.UTF_8); /*
                                                               * 4
                                                               * (LZXC
                                                               * )
                                                               */
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
     }
 
     /**
@@ -257,12 +254,8 @@ public class ChmLzxcControlData implemen
     public String toString() {
         StringBuilder sb = new StringBuilder();
         sb.append("size(unknown):=" + this.getSize() + ", ");
-        try {
-            sb.append("signature(Compression type identifier):="
-                    + new String(this.getSignature(), "UTF-8") + ", ");
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+        sb.append("signature(Compression type identifier):="
+                + new String(this.getSignature(), IOUtils.UTF_8) + ", ");
         sb.append("version(Possibly numeric code for LZX):="
                 + this.getVersion() + System.getProperty("line.separator"));
         sb.append("resetInterval(The Huffman reset interval):="
@@ -313,14 +306,10 @@ public class ChmLzxcControlData implemen
                     "window size / resetInterval should be more than 1");
 
         /* checks a signature */
-        try {
-            if (!new String(chmLzxcControlData.getSignature(), "UTF-8")
-                    .equals(ChmConstants.LZXC))
-                throw new ChmParsingException(
-                        "the signature does not seem to be correct");
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+        if (!new String(chmLzxcControlData.getSignature(), IOUtils.UTF_8)
+                .equals(ChmConstants.LZXC))
+            throw new ChmParsingException(
+                    "the signature does not seem to be correct");
     }
 
     /**

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
 Fri Jan 23 19:55:51 2015
@@ -16,10 +16,10 @@
  */
 package org.apache.tika.parser.chm.accessor;
 
-import java.io.UnsupportedEncodingException;
 import java.util.Arrays;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.parser.chm.assertion.ChmAssert;
 import org.apache.tika.parser.chm.core.ChmCommons;
 import org.apache.tika.parser.chm.core.ChmConstants;
@@ -54,11 +54,7 @@ public class ChmPmgiHeader implements Ch
     private int currentPlace = 0;
 
     public ChmPmgiHeader() {
-        try {
-            signature = ChmConstants.CHM_PMGI_MARKER.getBytes("UTF-8"); /* 0 
(PMGI) */
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+        signature = ChmConstants.CHM_PMGI_MARKER.getBytes(IOUtils.UTF_8); /* 0 
(PMGI) */
     }
 
     private int getDataRemained() {
@@ -84,12 +80,9 @@ public class ChmPmgiHeader implements Ch
         ChmAssert.assertChmAccessorNotNull(chmPmgiHeader);
         ChmAssert.assertPositiveInt(count);
         this.setDataRemained(data.length);
-        try {
             index = ChmCommons.indexOf(data,
-                    ChmConstants.CHM_PMGI_MARKER.getBytes("UTF-8"));
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+                    ChmConstants.CHM_PMGI_MARKER.getBytes(IOUtils.UTF_8));
+
         if (index >= 0)
             System.arraycopy(data, index, chmPmgiHeader.getSignature(), 0, 
count);
         else{
@@ -156,11 +149,7 @@ public class ChmPmgiHeader implements Ch
      */
     public String toString() {
         StringBuilder sb = new StringBuilder();
-        try {
-            sb.append("signature:=" + new String(getSignature(), "UTF-8") + ", 
");
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+        sb.append("signature:=" + new String(getSignature(), IOUtils.UTF_8) + 
", ");
         sb.append("free space:=" + getFreeSpace()
                 + System.getProperty("line.separator"));
         return sb.toString();
@@ -177,14 +166,10 @@ public class ChmPmgiHeader implements Ch
         chmPmgiHeader.setFreeSpace(chmPmgiHeader.unmarshalUInt32(data, 
chmPmgiHeader.getFreeSpace()));
 
         /* check structure */
-        try {
-            if (!Arrays.equals(chmPmgiHeader.getSignature(),
-                    ChmConstants.CHM_PMGI_MARKER.getBytes("UTF-8")))
-                throw new TikaException(
-                        "it does not seem to be valid a PMGI signature, check 
ChmItsp index_root if it was -1, means no PMGI, use PMGL insted");
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+        if (!Arrays.equals(chmPmgiHeader.getSignature(),
+                ChmConstants.CHM_PMGI_MARKER.getBytes(IOUtils.UTF_8)))
+            throw new TikaException(
+                    "it does not seem to be valid a PMGI signature, check 
ChmItsp index_root if it was -1, means no PMGI, use PMGL insted");
 
     }
 }

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
 Fri Jan 23 19:55:51 2015
@@ -16,9 +16,8 @@
  */
 package org.apache.tika.parser.chm.accessor;
 
-import java.io.UnsupportedEncodingException;
-
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.parser.chm.assertion.ChmAssert;
 import org.apache.tika.parser.chm.core.ChmConstants;
 import org.apache.tika.parser.chm.exception.ChmParsingException;
@@ -68,15 +67,11 @@ public class ChmPmglHeader implements Ch
     private int currentPlace = 0;
 
     public ChmPmglHeader() {
-        try {
-            signature = ChmConstants.PMGL.getBytes("UTF-8"); /*
+            signature = ChmConstants.PMGL.getBytes(IOUtils.UTF_8); /*
                                                                           * 0
                                                                           * 
(PMGL
                                                                           * )
                                                                           */
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
     }
 
     private int getDataRemained() {
@@ -108,11 +103,7 @@ public class ChmPmglHeader implements Ch
 
     public String toString() {
         StringBuilder sb = new StringBuilder();
-        try {
-            sb.append("signatute:=" + new String(getSignature(), "UTF-8") + ", 
");
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+        sb.append("signatute:=" + new String(getSignature(), IOUtils.UTF_8) + 
", ");
         sb.append("free space:=" + getFreeSpace() + ", ");
         sb.append("unknown0008:=" + getUnknown0008() + ", ");
         sb.append("prev block:=" + getBlockPrev() + ", ");
@@ -175,13 +166,9 @@ public class ChmPmglHeader implements Ch
         chmPmglHeader.setBlockNext(chmPmglHeader.unmarshalInt32(data));
 
         /* check structure */
-        try {
-            if (!new String(chmPmglHeader.getSignature(), 
"UTF-8").equals(ChmConstants.PMGL))
-                throw new ChmParsingException(ChmPmglHeader.class.getName()
-                        + " pmgl != pmgl.signature");
-        } catch (UnsupportedEncodingException e) {
-            throw new AssertionError("UTF-8 not supported.");
-        }
+        if (!new String(chmPmglHeader.getSignature(), 
IOUtils.UTF_8).equals(ChmConstants.PMGL))
+            throw new ChmParsingException(ChmPmglHeader.class.getName()
+                    + " pmgl != pmgl.signature");
     }
 
     public byte[] getSignature() {

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
 Fri Jan 23 19:55:51 2015
@@ -16,12 +16,14 @@
  */
 package org.apache.tika.parser.chm.core;
 
+import org.apache.tika.io.IOUtils;
+
 public class ChmConstants {
     /* Prevents instantiation */
     private ChmConstants() {
     }
 
-    public static final String DEFAULT_CHARSET = "UTF-8";
+    public static final String DEFAULT_CHARSET = IOUtils.UTF_8.name();
     public static final String ITSF = "ITSF";
     public static final String ITSP = "ITSP";
     public static final String PMGL = "PMGL";

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
 Fri Jan 23 19:55:51 2015
@@ -172,7 +172,7 @@ public class ChmExtractor {
 
             int indexOfControlData = getChmDirList().getControlDataIndex();
             int indexOfResetData = ChmCommons.indexOfResetTableBlock(getData(),
-                    ChmConstants.LZXC.getBytes("UTF-8"));
+                    ChmConstants.LZXC.getBytes(IOUtils.UTF_8));
             byte[] dir_chunk = null;
             if (indexOfResetData > 0)
                 dir_chunk = ChmCommons.copyOfRange( getData(), 
indexOfResetData, indexOfResetData  

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
 Fri Jan 23 19:55:51 2015
@@ -93,7 +93,7 @@ public class EpubParser extends Abstract
         ZipEntry entry = zip.getNextEntry();
         while (entry != null) {
             if (entry.getName().equals("mimetype")) {
-                String type = IOUtils.toString(zip, "UTF-8");
+                String type = IOUtils.toString(zip, IOUtils.UTF_8.name());
                 metadata.set(Metadata.CONTENT_TYPE, type);
             } else if (entry.getName().equals("metadata.xml")) {
                 meta.parse(zip, new DefaultHandler(), metadata, context);

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
 Fri Jan 23 19:55:51 2015
@@ -30,9 +30,8 @@ import java.util.Scanner;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-
-//Tika imports
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
@@ -41,12 +40,13 @@ import org.apache.tika.parser.AbstractPa
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.external.ExternalParser;
 import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
 
 import static org.apache.tika.parser.external.ExternalParser.INPUT_FILE_TOKEN;
 
+//Tika imports
 //SAX imports
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
 
 /**
  * Wraps execution of the <a href="http//gdal.org/">Geospatial Data Abstraction
@@ -385,7 +385,7 @@ public class GDALParser extends Abstract
     private String extractOutput(InputStream stream) throws SAXException,
             IOException {
         StringBuffer sb = new StringBuffer();
-        Reader reader = new InputStreamReader(stream, "UTF-8");
+        Reader reader = new InputStreamReader(stream, IOUtils.UTF_8);
         try {
             char[] buffer = new char[1024];
             for (int n = reader.read(buffer); n != -1; n = 
reader.read(buffer)) {
@@ -400,8 +400,8 @@ public class GDALParser extends Abstract
     private void processOutput(ContentHandler handler, Metadata metadata,
                                String output) throws SAXException, IOException 
{
         XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
-        InputStream stream = new 
ByteArrayInputStream(output.getBytes("UTF-8"));
-        Reader reader = new InputStreamReader(stream, "UTF-8");
+        InputStream stream = new 
ByteArrayInputStream(output.getBytes(IOUtils.UTF_8));
+        Reader reader = new InputStreamReader(stream, IOUtils.UTF_8);
         try {
             xhtml.startDocument();
             xhtml.startElement("p");

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
 Fri Jan 23 19:55:51 2015
@@ -301,7 +301,7 @@ public class ImageMetadataExtractor {
             @Override
             protected SimpleDateFormat initialValue()
             {
-               return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+               return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US);
             }
         };
 

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
 Fri Jan 23 19:55:51 2015
@@ -22,12 +22,12 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
-import java.util.Iterator;
 import java.util.List;
 
 import org.apache.jempbox.xmp.XMPMetadata;
 import org.apache.jempbox.xmp.XMPSchemaDublinCore;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.xml.sax.InputSource;
@@ -39,7 +39,7 @@ public class JempboxExtractor {
     private Metadata metadata;
     
     // The XMP spec says it must be unicode, but for most file formats it 
specifies "must be encoded in UTF-8"
-    private static final String DEFAULT_XMP_CHARSET = "UTF-8";
+    private static final String DEFAULT_XMP_CHARSET = IOUtils.UTF_8.name();
 
     public JempboxExtractor(Metadata metadata) {
         this.metadata = metadata;

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
 Fri Jan 23 19:55:51 2015
@@ -28,6 +28,7 @@ import java.util.Set;
 import java.util.TimeZone;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
@@ -161,7 +162,7 @@ public class IptcAnpaParser implements P
          }
          int msgsize = is.read(buf);                // read in at least the 
full data
 
-         String message = (new String(buf, "UTF-8")).toLowerCase(Locale.ROOT);
+         String message = (new String(buf, 
IOUtils.UTF_8)).toLowerCase(Locale.ROOT);
          // these are not if-then-else, because we want to go from most common
          // and fall through to least.  this is imperfect, as these tags could
          // show up in other agency stories, but i can't find a spec or any

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java?rev=1654351&r1=1654350&r2=1654351&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java 
(original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java 
Fri Jan 23 19:55:51 2015
@@ -24,6 +24,7 @@ import java.util.Set;
 import java.util.Map;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AbstractParser;
@@ -86,7 +87,7 @@ public class MatParser extends AbstractP
             }
 
             // Get endian indicator from header file
-            String endianBytes = new String(hdr.getEndianIndicator(), 
"UTF-8"); // Retrieve endian bytes and convert to string
+            String endianBytes = new String(hdr.getEndianIndicator(), 
IOUtils.UTF_8); // Retrieve endian bytes and convert to string
             String endianCode = String.valueOf(endianBytes.toCharArray()); // 
Convert bytes to characters to string
             metadata.set("endian", endianCode);
 


Reply via email to