Author: mattmann
Date: Sun Aug 31 19:36:36 2014
New Revision: 1621623

URL: http://svn.apache.org/r1621623
Log:
Bring 1.6 branch up to date with trunk in prep for 1.6 RC #2.

Added:
    tika/branches/1.6/tika-example/
      - copied from r1621617, tika/trunk/tika-example/
    
tika/branches/1.6/tika-parsers/src/test/resources/test-documents/testComment.xls
      - copied unchanged from r1621617, 
tika/trunk/tika-parsers/src/test/resources/test-documents/testComment.xls
    
tika/branches/1.6/tika-parsers/src/test/resources/test-documents/testComment.xlsx
      - copied unchanged from r1621617, 
tika/trunk/tika-parsers/src/test/resources/test-documents/testComment.xlsx
    
tika/branches/1.6/tika-parsers/src/test/resources/test-documents/testTXT-tika.axx
      - copied unchanged from r1621617, 
tika/trunk/tika-parsers/src/test/resources/test-documents/testTXT-tika.axx
    
tika/branches/1.6/tika-server/src/main/java/org/apache/tika/server/TikaLoggingFilter.java
      - copied unchanged from r1621617, 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaLoggingFilter.java
    
tika/branches/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/ExternalTranslator.java
      - copied unchanged from r1621617, 
tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/ExternalTranslator.java
    
tika/branches/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
      - copied unchanged from r1621617, 
tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
    
tika/branches/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
      - copied unchanged from r1621617, 
tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
    
tika/branches/1.6/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties
      - copied unchanged from r1621617, 
tika/trunk/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties
    
tika/branches/1.6/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties
      - copied unchanged from r1621617, 
tika/trunk/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties
    
tika/branches/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/Lingo24TranslatorTest.java
      - copied unchanged from r1621617, 
tika/trunk/tika-translate/src/test/java/org/apache/tika/language/translate/Lingo24TranslatorTest.java
    
tika/branches/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/MosesTranslatorTest.java
      - copied unchanged from r1621617, 
tika/trunk/tika-translate/src/test/java/org/apache/tika/language/translate/MosesTranslatorTest.java
Removed:
    tika/branches/1.6/src/
Modified:
    tika/branches/1.6/   (props changed)
    tika/branches/1.6/CHANGES.txt
    tika/branches/1.6/pom.xml
    tika/branches/1.6/tika-app/pom.xml
    tika/branches/1.6/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
    
tika/branches/1.6/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
    tika/branches/1.6/tika-bundle/pom.xml
    tika/branches/1.6/tika-core/pom.xml
    
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java
    
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
    tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
    
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java
    
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java
    
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
    
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
    
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
    
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
    
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java
    
tika/branches/1.6/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
    
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
    
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
    
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
    tika/branches/1.6/tika-example/pom.xml
    tika/branches/1.6/tika-java7/pom.xml
    tika/branches/1.6/tika-parent/pom.xml
    tika/branches/1.6/tika-parsers/pom.xml
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/iwork/AutoPageNumberUtils.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/CompositeTagHandler.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
    
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
    tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/TikaTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmBlockInfo.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/code/SourceCodeParserTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
    
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
    
tika/branches/1.6/tika-parsers/src/test/resources/test-documents/testMP3id3v2.mp3
    
tika/branches/1.6/tika-parsers/src/test/resources/test-documents/testMP3id3v24.mp3
    tika/branches/1.6/tika-serialization/   (props changed)
    tika/branches/1.6/tika-serialization/pom.xml
    
tika/branches/1.6/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java
    tika/branches/1.6/tika-server/pom.xml
    
tika/branches/1.6/tika-server/src/main/java/org/apache/tika/server/TikaMimeTypes.java
    
tika/branches/1.6/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
    
tika/branches/1.6/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
    
tika/branches/1.6/tika-server/src/main/java/org/apache/tika/server/TikaWelcome.java
    
tika/branches/1.6/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
    
tika/branches/1.6/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java
    
tika/branches/1.6/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java
    
tika/branches/1.6/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
    
tika/branches/1.6/tika-server/src/test/java/org/apache/tika/server/TikaDetectorsTest.java
    
tika/branches/1.6/tika-server/src/test/java/org/apache/tika/server/TikaWelcomeTest.java
    tika/branches/1.6/tika-translate/   (props changed)
    tika/branches/1.6/tika-translate/pom.xml
    
tika/branches/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
    
tika/branches/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/MicrosoftTranslator.java
    
tika/branches/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/CachedTranslatorTest.java
    
tika/branches/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/MicrosoftTranslatorTest.java
    tika/branches/1.6/tika-xmp/pom.xml
    
tika/branches/1.6/tika-xmp/src/test/java/org/apache/tika/xmp/XMPMetadataTest.java

Propchange: tika/branches/1.6/
------------------------------------------------------------------------------
  Merged 
/tika/trunk:r1613865-1615129,1615131-1615173,1615175-1615623,1615625-1615630,1615632-1619107,1619109-1621617

Modified: tika/branches/1.6/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/CHANGES.txt?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/CHANGES.txt (original)
+++ tika/branches/1.6/CHANGES.txt Sun Aug 31 19:36:36 2014
@@ -1,4 +1,19 @@
-Release 1.6 - 07/27/2014
+Release 1.6 - 08/31/2014
+
+  * Parse output should indicate which Parser was actually used
+    (TIKA-674).
+
+  * Use the forbidden-apis Maven plugin to check for unsafe Java
+    operations (TIKA-1387).
+
+  * Created an ExternalTranslator class to interface with command
+    line Translators (TIKA-1385).
+
+  * Created a MosesTranslator as a subclass of ExternalTranslator
+    that calls the Moses Decoder machine translation program (TIKA-1385).
+
+  * Created the tika-example module. It will have examples of how to
+    use the main Tika interfaces (TIKA-1390).
 
   * Upgraded to Commons Compress 1.8.1 (TIKA-1275).
 

Modified: tika/branches/1.6/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/pom.xml (original)
+++ tika/branches/1.6/pom.xml Sun Aug 31 19:36:36 2014
@@ -36,12 +36,12 @@
 
   <scm>
     <connection>
-      scm:svn:http://svn.apache.org/repos/asf/tika/tags/1.6
+      scm:svn:http://svn.apache.org/repos/asf/tika/trunk
     </connection>
     <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/tika/tags/1.6
+      scm:svn:https://svn.apache.org/repos/asf/tika/trunk
     </developerConnection>
-    <url>http://svn.apache.org/viewvc/tika/tags/1.6</url>
+    <url>http://svn.apache.org/viewvc/tika/trunk</url>
   </scm>
 
   <modules>
@@ -54,42 +54,9 @@
     <module>tika-bundle</module>
     <module>tika-server</module>
     <module>tika-translate</module>
+    <module>tika-example</module>
   </modules>
 
-  <build>
-    <plugins>
-      <plugin>
-        <artifactId>maven-deploy-plugin</artifactId>
-        <configuration>
-          <skip>true</skip> <!-- No need to deploy the reactor -->
-        </configuration>
-      </plugin>
-       <plugin>
-        <artifactId>maven-site-plugin</artifactId>
-        <configuration>
-          <templateDirectory>src/site</templateDirectory>
-          <template>site.vm</template>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.rat</groupId>
-        <artifactId>apache-rat-plugin</artifactId>
-        <configuration>
-          <excludes>
-            <exclude>.*/**</exclude>
-            <exclude>CHANGES.txt</exclude>
-            <exclude>tika-dotnet/AssemblyInfo.cs</exclude>
-            <exclude>tika-dotnet/Tika.csproj</exclude>
-            <exclude>tika-dotnet/Tika.sln</exclude>
-            <exclude>tika-dotnet/Tika.sln.cache</exclude>
-            <exclude>tika-dotnet/obj/**</exclude>
-            <exclude>tika-dotnet/target/**</exclude>
-          </excludes>
-        </configuration>
-      </plugin>
-    </plugins>
-  </build>
-
   <profiles>
     <profile>
       <id>apache-release</id>

Modified: tika/branches/1.6/tika-app/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-app/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-app/pom.xml (original)
+++ tika/branches/1.6/tika-app/pom.xml Sun Aug 31 19:36:36 2014
@@ -25,7 +25,7 @@
   <parent>
     <groupId>org.apache.tika</groupId>
     <artifactId>tika-parent</artifactId>
-    <version>1.6</version>
+    <version>1.7-SNAPSHOT</version>
     <relativePath>../tika-parent/pom.xml</relativePath>
   </parent>
 
@@ -66,8 +66,6 @@
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <scope>test</scope>
-      <version>4.11</version>
     </dependency>
     <dependency>
       <artifactId>commons-io</artifactId>
@@ -230,9 +228,9 @@
        <url>http://www.apache.org</url>
   </organization>
   <scm>
-       <url>http://svn.apache.org/viewvc/tika/tags/1.6/tika-app</url>
-       
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/tags/1.6/tika-app</connection>
-       
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/tags/1.6/tika-app</developerConnection>
+       <url>http://svn.apache.org/viewvc/tika/trunk/tika-app</url>
+       
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/tika-app</connection>
+       
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/tika-app</developerConnection>
   </scm>
   <issueManagement>
        <system>JIRA</system>

Modified: 
tika/branches/1.6/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
(original)
+++ tika/branches/1.6/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
Sun Aug 31 19:36:36 2014
@@ -31,13 +31,15 @@ import java.net.ServerSocket;
 import java.net.Socket;
 import java.net.URI;
 import java.net.URL;
+import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Map.Entry;
+import java.util.Locale;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.TransformerConfigurationException;
@@ -656,11 +658,11 @@ public class TikaCLI {
         if (encoding != null) {
             return new OutputStreamWriter(output, encoding);
         } else if (System.getProperty("os.name")
-                .toLowerCase().startsWith("mac os x")) {
+                .toLowerCase(Locale.ROOT).startsWith("mac os x")) {
             // TIKA-324: Override the default encoding on Mac OS X
             return new OutputStreamWriter(output, "UTF-8");
         } else {
-            return new OutputStreamWriter(output);
+            return new OutputStreamWriter(output, Charset.defaultCharset());
         }
     }
 
@@ -759,6 +761,7 @@ public class TikaCLI {
                 // being a CLI program messages should go to the stderr too
                 //
                 String msg = String.format(
+                    Locale.ROOT,
                     "Ignoring unexpected exception trying to save embedded 
file %s (%s)",
                     name,
                     e.getMessage()
@@ -821,13 +824,17 @@ public class TikaCLI {
                 @Override
                 public void run() {
                     try {
+                        InputStream input = null;
                         try {
                             InputStream rawInput = socket.getInputStream();
                             OutputStream output = socket.getOutputStream();
-                            InputStream input = TikaInputStream.get(rawInput);
+                            input = TikaInputStream.get(rawInput);
                             type.process(input, output, new Metadata());
                             output.flush();
                         } finally {
+                            if (input != null) {
+                                input.close();
+                            }
                             socket.close();
                         }
                     } catch (Exception e) {

Modified: 
tika/branches/1.6/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
(original)
+++ 
tika/branches/1.6/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
Sun Aug 31 19:36:36 2014
@@ -20,6 +20,7 @@ import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.PrintStream;
 import java.net.URI;
+import java.util.Locale;
 
 import org.apache.commons.io.FileUtils;
 
@@ -39,14 +40,15 @@ public class TikaCLITest {
     private ByteArrayOutputStream outContent = null;
     private PrintStream stdout = null;
     private URI testDataURI = new 
File("src/test/resources/test-data/").toURI();
-    private String resourcePrefix = testDataURI.toString();
+    private String resourcePrefix;
 
     @Before
     public void setUp() throws Exception {
         profile = new File("welsh.ngp");
         outContent = new ByteArrayOutputStream();
+        resourcePrefix = testDataURI.toString();
         stdout = System.out;
-        System.setOut(new PrintStream(outContent));
+        System.setOut(new PrintStream(outContent, true, "UTF-8"));
     }
 
     /**
@@ -70,7 +72,7 @@ public class TikaCLITest {
     public void testListParserDetail() throws Exception{
         String[] params = {"--list-parser-detail"};
         TikaCLI.main(params);
-        
assertTrue(outContent.toString().contains("application/vnd.oasis.opendocument.text-web"));
+        
assertTrue(outContent.toString("UTF-8").contains("application/vnd.oasis.opendocument.text-web"));
     }
 
     /**
@@ -83,7 +85,7 @@ public class TikaCLITest {
         String[] params = {"--list-parser"};
         TikaCLI.main(params);
         //Assert was commented temporarily for finding the problem
-        //             Assert.assertTrue(outContent != null && 
outContent.toString().contains("org.apache.tika.parser.iwork.IWorkPackageParser"));
+        //             Assert.assertTrue(outContent != null && 
outContent.toString("UTF-8").contains("org.apache.tika.parser.iwork.IWorkPackageParser"));
     }
 
     /**
@@ -95,7 +97,7 @@ public class TikaCLITest {
     public void testXMLOutput() throws Exception{
         String[] params = {"-x", resourcePrefix + "alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString().contains("?xml version=\"1.0\" 
encoding=\"UTF-8\"?"));
+        assertTrue(outContent.toString("UTF-8").contains("?xml version=\"1.0\" 
encoding=\"UTF-8\"?"));
     }
 
     /**
@@ -107,9 +109,9 @@ public class TikaCLITest {
     public void testHTMLOutput() throws Exception{
         String[] params = {"-h", resourcePrefix + "alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString().contains("html 
xmlns=\"http://www.w3.org/1999/xhtml";));
+        assertTrue(outContent.toString("UTF-8").contains("html 
xmlns=\"http://www.w3.org/1999/xhtml";));
         assertTrue("Expanded <title></title> element should be present",
-                outContent.toString().contains("<title></title>"));
+                outContent.toString("UTF-8").contains("<title></title>"));
     }
 
     /**
@@ -121,7 +123,7 @@ public class TikaCLITest {
     public void testTextOutput() throws Exception{
         String[] params = {"-t", resourcePrefix + "alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString().contains("finished off the cake"));
+        assertTrue(outContent.toString("UTF-8").contains("finished off the 
cake"));
     }
 
     /**
@@ -132,7 +134,7 @@ public class TikaCLITest {
     public void testMetadataOutput() throws Exception{
         String[] params = {"-m", resourcePrefix + "alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString().contains("text/plain"));
+        assertTrue(outContent.toString("UTF-8").contains("text/plain"));
     }
 
     /**
@@ -144,7 +146,7 @@ public class TikaCLITest {
     public void testJsonMetadataOutput() throws Exception {
         String[] params = {"--json", resourcePrefix + 
"testJsonMultipleInts.html"};
         TikaCLI.main(params);
-        String json = outContent.toString();
+        String json = outContent.toString("UTF-8");
         //TIKA-1310
         assertTrue(json.contains("\"fb:admins\":\"1,2,3,4\","));
         
@@ -165,7 +167,7 @@ public class TikaCLITest {
     public void testLanguageOutput() throws Exception{
         String[] params = {"-l", resourcePrefix + "alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString().contains("en"));
+        assertTrue(outContent.toString("UTF-8").contains("en"));
     }
 
     /**
@@ -177,7 +179,7 @@ public class TikaCLITest {
     public void testDetectOutput() throws Exception{
         String[] params = {"-d", resourcePrefix + "alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString().contains("text/plain"));
+        assertTrue(outContent.toString("UTF-8").contains("text/plain"));
     }
 
     /**
@@ -189,7 +191,7 @@ public class TikaCLITest {
     public void testListMetModels() throws Exception{
         String[] params = {"--list-met-models", resourcePrefix + 
"alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString().contains("text/plain"));
+        assertTrue(outContent.toString("UTF-8").contains("text/plain"));
     }
 
     /**
@@ -201,7 +203,7 @@ public class TikaCLITest {
     public void testListSupportedTypes() throws Exception{
         String[] params = {"--list-supported-types", resourcePrefix + 
"alice.cli.test"};
         TikaCLI.main(params);
-        assertTrue(outContent.toString().contains("supertype: 
application/octet-stream"));
+        assertTrue(outContent.toString("UTF-8").contains("supertype: 
application/octet-stream"));
     }
 
     /**
@@ -274,7 +276,7 @@ public class TikaCLITest {
     public void testMultiValuedMetadata() throws Exception {
         String[] params = {"-m", resourcePrefix + 
"testMultipleSheets.numbers"};
         TikaCLI.main(params);
-        String content = outContent.toString();
+        String content = outContent.toString("UTF-8");
         assertTrue(content.contains("sheetNames: Checking"));
         assertTrue(content.contains("sheetNames: Secon sheet"));
         assertTrue(content.contains("sheetNames: Logical Sheet 3"));
@@ -288,7 +290,7 @@ public class TikaCLITest {
         new File("subdir/foo.txt").delete();
         new File("subdir").delete();
         TikaCLI.main(params);
-        String content = outContent.toString();
+        String content = outContent.toString("UTF-8");
         assertTrue(content.contains("Extracting 'subdir/foo.txt'"));
         // clean up. TODO: These should be in target.
         new File("target/subdir/foo.txt").delete();

Modified: tika/branches/1.6/tika-bundle/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-bundle/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-bundle/pom.xml (original)
+++ tika/branches/1.6/tika-bundle/pom.xml Sun Aug 31 19:36:36 2014
@@ -61,8 +61,6 @@
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <scope>test</scope>
-      <version>4.11</version>
     </dependency>
     <dependency>
       <groupId>org.ops4j.pax.exam</groupId>
@@ -253,6 +251,16 @@
           </execution>
         </executions>
       </plugin>
+
+      <!-- The Tika Bundle has no java code of its own, so no need to do -->
+      <!--  any forbidden API checking against it (it gets confused...) -->
+      <plugin>
+        <groupId>de.thetaphi</groupId>
+        <artifactId>forbiddenapis</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 
@@ -309,9 +317,9 @@
        <url>http://www.apache.org</url>
   </organization>
   <scm>
-       <url>http://svn.apache.org/viewvc/tika/tags/1.6/tika-bundle</url>
-       
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/tags/1.6/tika-bundle</connection>
-       
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/tags/1.6/tika-bundle</developerConnection>
+       <url>http://svn.apache.org/viewvc/tika/trunk/tika-bundle</url>
+       
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/tika-bundle</connection>
+       
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/tika-bundle</developerConnection>
   </scm>
   <issueManagement>
        <system>JIRA</system>

Modified: tika/branches/1.6/tika-core/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-core/pom.xml (original)
+++ tika/branches/1.6/tika-core/pom.xml Sun Aug 31 19:36:36 2014
@@ -60,8 +60,6 @@
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <scope>test</scope>
-      <version>4.11</version>
     </dependency>
   </dependencies>
 
@@ -160,9 +158,9 @@
        <url>http://www.apache.org</url>
   </organization>
   <scm>
-       <url>http://svn.apache.org/viewvc/tika/tags/1.6/core</url>
-       
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/tags/1.6/core</connection>
-       
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/tags/1.6/core</developerConnection>
+       <url>http://svn.apache.org/viewvc/tika/trunk/core</url>
+       
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/core</connection>
+       
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/core</developerConnection>
   </scm>
   <issueManagement>
        <system>JIRA</system>

Modified: 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java
 (original)
+++ 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java
 Sun Aug 31 19:36:36 2014
@@ -19,9 +19,11 @@ package org.apache.tika.detect;
 import java.io.CharArrayWriter;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
+import java.util.Locale;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -95,9 +97,13 @@ public class MagicDetector implements De
                 || type.equals("unicodeBE")) {
             decoded = decodeString(value, type);
         } else if (type.equals("stringignorecase")) {
-            decoded = decodeString(value.toLowerCase(), type);
+            decoded = decodeString(value.toLowerCase(Locale.ROOT), type);
         } else if (type.equals("byte")) {
-            decoded = tmpVal.getBytes();
+            try {
+                decoded = tmpVal.getBytes("UTF-8");
+            } catch (UnsupportedEncodingException e) {
+                throw new AssertionError("UTF-8 not supported.");
+            }
         } else if (type.equals("host16") || type.equals("little16")) {
             int i = Integer.parseInt(tmpVal, radix);
             decoded = new byte[] { (byte) (i & 0x00FF), (byte) (i >> 8) };
@@ -393,7 +399,7 @@ public class MagicDetector implements De
                     flags = Pattern.CASE_INSENSITIVE;
                 }
                 
-                Pattern p = Pattern.compile(new String(this.pattern), flags);
+                Pattern p = Pattern.compile(new String(this.pattern, "UTF-8"), 
flags);
 
                 ByteBuffer bb = ByteBuffer.wrap(buffer);
                 CharBuffer result = ISO_8859_1.decode(bb);

Modified: 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java 
(original)
+++ 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java 
Sun Aug 31 19:36:36 2014
@@ -17,6 +17,7 @@
 package org.apache.tika.io;
 
 import java.util.HashSet;
+import java.util.Locale;
 
 
 public class FilenameUtils {
@@ -65,7 +66,7 @@ public class FilenameUtils {
 
         for (char c: name.toCharArray()) {
             if (RESERVED.contains(c)) {
-                sb.append('%').append((c<16) ? "0" : 
"").append(Integer.toHexString(c).toUpperCase());
+                sb.append('%').append((c<16) ? "0" : 
"").append(Integer.toHexString(c).toUpperCase(Locale.ROOT));
             } else {
                 sb.append(c);
             }

Modified: 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/IOUtils.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/IOUtils.java 
(original)
+++ tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/IOUtils.java 
Sun Aug 31 19:36:36 2014
@@ -28,6 +28,7 @@ import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Reader;
 import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
 import java.io.Writer;
 import java.nio.channels.Channel;
 import java.util.ArrayList;
@@ -254,7 +255,7 @@ public class IOUtils {
      */
     @Deprecated
     public static byte[] toByteArray(String input) throws IOException {
-        return input.getBytes();
+        return input.getBytes("UTF-8");
     }
 
     // read char[]
@@ -392,7 +393,7 @@ public class IOUtils {
      */
     @Deprecated
     public static String toString(byte[] input) throws IOException {
-        return new String(input);
+        return new String(input, "UTF-8");
     }
 
     /**
@@ -412,8 +413,9 @@ public class IOUtils {
     @Deprecated
     public static String toString(byte[] input, String encoding)
             throws IOException {
+        // If no encoding is specified, default to UTF-8.
         if (encoding == null) {
-            return new String(input);
+            return new String(input, "UTF-8");
         } else {
             return new String(input, encoding);
         }
@@ -435,7 +437,7 @@ public class IOUtils {
      * @since Commons IO 1.1
      */
     public static List<String> readLines(InputStream input) throws IOException 
{
-        InputStreamReader reader = new InputStreamReader(input);
+        InputStreamReader reader = new InputStreamReader(input, "UTF-8");
         return readLines(reader);
     }
 
@@ -529,8 +531,13 @@ public class IOUtils {
      * @since Commons IO 1.1
      */
     public static InputStream toInputStream(String input) {
-        byte[] bytes = input.getBytes();
-        return new ByteArrayInputStream(bytes);
+        try {
+            byte[] bytes = input.getBytes("UTF-8");
+            return new ByteArrayInputStream(bytes);
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
+
     }
 
     /**
@@ -547,7 +554,7 @@ public class IOUtils {
      * @since Commons IO 1.1
      */
     public static InputStream toInputStream(String input, String encoding) 
throws IOException {
-        byte[] bytes = encoding != null ? input.getBytes(encoding) : 
input.getBytes();
+        byte[] bytes = encoding != null ? input.getBytes(encoding) : 
input.getBytes("UTF-8");
         return new ByteArrayInputStream(bytes);
     }
 
@@ -585,7 +592,7 @@ public class IOUtils {
      */
     public static void write(byte[] data, Writer output) throws IOException {
         if (data != null) {
-            output.write(new String(data));
+            output.write(new String(data, "UTF-8"));
         }
     }
 
@@ -653,7 +660,7 @@ public class IOUtils {
     public static void write(char[] data, OutputStream output)
             throws IOException {
         if (data != null) {
-            output.write(new String(data).getBytes());
+            output.write(new String(data).getBytes("UTF-8"));
         }
     }
 
@@ -779,7 +786,7 @@ public class IOUtils {
     public static void write(String data, OutputStream output)
             throws IOException {
         if (data != null) {
-            output.write(data.getBytes());
+            output.write(data.getBytes("UTF-8"));
         }
     }
 
@@ -848,7 +855,7 @@ public class IOUtils {
     public static void write(StringBuffer data, OutputStream output)
             throws IOException {
         if (data != null) {
-            output.write(data.toString().getBytes());
+            output.write(data.toString().getBytes("UTF-8"));
         }
     }
 
@@ -954,7 +961,7 @@ public class IOUtils {
      */
     public static void copy(InputStream input, Writer output)
             throws IOException {
-        InputStreamReader in = new InputStreamReader(input);
+        InputStreamReader in = new InputStreamReader(input, "UTF-8");
         copy(in, output);
     }
 
@@ -1061,7 +1068,7 @@ public class IOUtils {
      */
     public static void copy(Reader input, OutputStream output)
             throws IOException {
-        OutputStreamWriter out = new OutputStreamWriter(output);
+        OutputStreamWriter out = new OutputStreamWriter(output, "UTF-8");
         copy(input, out);
         // XXX Unless anyone is planning on rewriting OutputStreamWriter, we
         // have to flush here.

Modified: 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java
 (original)
+++ 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java
 Sun Aug 31 19:36:36 2014
@@ -405,7 +405,7 @@ public class LanguageProfilerBuilder {
      */
     public void save(OutputStream os) throws IOException {
         os.write(("# NgramProfile generated at " + new Date() + 
-                  " for Apache Tika Language Identification\n").getBytes());
+                  " for Apache Tika Language 
Identification\n").getBytes("UTF-8"));
 
         // And then each ngram
 

Modified: 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java 
(original)
+++ 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java 
Sun Aug 31 19:36:36 2014
@@ -60,6 +60,11 @@ public interface XMPDM {
     Property ARTIST = Property.externalText("xmpDM:artist");
 
     /**
+     * "The name of the album artist or group for compilation albums."
+     */
+    Property ALBUM_ARTIST = Property.externalText("xmpDM:albumArtist");
+
+    /**
      * "The date and time when the audio was last modified."
      */
     Property AUDIO_MOD_DATE = Property.internalDate("xmpDM:audioModDate");
@@ -142,6 +147,11 @@ public interface XMPDM {
 //    Property BEAT_SPLICE_PARAMS = "xmpDM:beatSpliceParams";
 
     /**
+     * "An album created by various artists."
+     */
+    Property COMPILATION = Property.externalInteger("xmpDM:compilation");
+
+    /**
      * "The composer's name."
      */
     Property COMPOSER = Property.externalText("xmpDM:composer");
@@ -157,6 +167,11 @@ public interface XMPDM {
     Property COPYRIGHT = Property.externalText("xmpDM:copyright");
 
     /**
+     * "The disc number for part of an album set."
+     */
+    Property DISC_NUMBER = Property.externalInteger("xmpDM:discNumber");
+
+    /**
      * "The duration of the media file."
      */
     Property DURATION = Property.externalReal("xmpDM:duration");

Modified: 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
 (original)
+++ 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
 Sun Aug 31 19:36:36 2014
@@ -239,6 +239,7 @@ public class CompositeParser extends Abs
             TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
             TaggedContentHandler taggedHandler = 
                 handler != null ? new TaggedContentHandler(handler) : null;
+           metadata.add("X-Parsed-By", parser.getClass().getName());
             try {
                 parser.parse(taggedStream, taggedHandler, metadata, context);
             } catch (RuntimeException e) {

Modified: 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
 (original)
+++ 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
 Sun Aug 31 19:36:36 2014
@@ -24,6 +24,7 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
 import java.io.Reader;
+import java.io.UnsupportedEncodingException;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Map;
@@ -231,7 +232,7 @@ public class ExternalParser extends Abst
      */
     private void extractOutput(InputStream stream, XHTMLContentHandler xhtml)
             throws SAXException, IOException {
-        Reader reader = new InputStreamReader(stream);
+        Reader reader = new InputStreamReader(stream, "UTF-8");
         try {
             xhtml.startDocument();
             xhtml.startElement("p");
@@ -291,7 +292,12 @@ public class ExternalParser extends Abst
     private void extractMetadata(final InputStream stream, final Metadata 
metadata) {
        new Thread() {
           public void run() {
-             BufferedReader reader = new BufferedReader(new 
InputStreamReader(stream));
+             BufferedReader reader;
+             try {
+                 reader = new BufferedReader(new InputStreamReader(stream, 
"UTF-8"));
+             } catch (UnsupportedEncodingException e) {
+                 throw new AssertionError("UTF-8 not supported.");
+             }
              try {
                 String line;
                 while ( (line = reader.readLine()) != null ) {
@@ -303,6 +309,7 @@ public class ExternalParser extends Abst
                    }
                 }
              } catch (IOException e) {
+                 // Ignore
              } finally {
                 IOUtils.closeQuietly(reader);
                 IOUtils.closeQuietly(stream);

Modified: 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
 (original)
+++ 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
 Sun Aug 31 19:36:36 2014
@@ -22,6 +22,7 @@ import java.io.OutputStreamWriter;
 import java.io.StringWriter;
 import java.io.UnsupportedEncodingException;
 import java.io.Writer;
+import java.nio.charset.Charset;
 
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
@@ -57,7 +58,7 @@ public class ToTextContentHandler extend
      * @param stream output stream
      */
     public ToTextContentHandler(OutputStream stream) {
-        this(new OutputStreamWriter(stream));
+        this(new OutputStreamWriter(stream, Charset.defaultCharset()));
     }
 
     /**

Modified: 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
 (original)
+++ 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
 Sun Aug 31 19:36:36 2014
@@ -21,6 +21,7 @@ import java.io.OutputStreamWriter;
 import java.io.Serializable;
 import java.io.StringWriter;
 import java.io.Writer;
+import java.nio.charset.Charset;
 import java.util.UUID;
 
 import org.xml.sax.ContentHandler;
@@ -90,7 +91,7 @@ public class WriteOutContentHandler exte
      * @param stream output stream
      */
     public WriteOutContentHandler(OutputStream stream) {
-        this(new OutputStreamWriter(stream));
+        this(new OutputStreamWriter(stream, Charset.defaultCharset()));
     }
 
     /**

Modified: 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java 
(original)
+++ 
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java 
Sun Aug 31 19:36:36 2014
@@ -80,7 +80,7 @@ public class DateUtils {
      */
     public static String formatDateUnknownTimezone(Date date) {
         // Create the Calendar object in the system timezone
-        Calendar calendar = GregorianCalendar.getInstance(Locale.US);
+        Calendar calendar = 
GregorianCalendar.getInstance(TimeZone.getDefault(), Locale.US);
         calendar.setTime(date);
         // Have it formatted
         String formatted = formatDate(calendar);
@@ -89,6 +89,7 @@ public class DateUtils {
     }
     private static String doFormatDate(Calendar calendar) {
         return String.format(
+                Locale.ROOT,
                 "%04d-%02d-%02dT%02d:%02d:%02dZ",
                 calendar.get(Calendar.YEAR),
                 calendar.get(Calendar.MONTH) + 1,

Modified: 
tika/branches/1.6/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
 (original)
+++ 
tika/branches/1.6/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
 Sun Aug 31 19:36:36 2014
@@ -2395,6 +2395,17 @@
     <glob pattern="*.ace"/>
   </mime-type>
 
+  <mime-type type="application/x-axcrypt">
+    <_comment>AxCrypt</_comment>
+    <glob pattern="*.axx" />
+    <magic priority="60">
+      <!-- AxCrypt block header, skip length field, then Header of type 
Preamble -->
+      <match value="0xc0b9072e4f93f146a015792ca1d9e821" type="string" 
offset="0">
+         <match value="2" type="big32" offset="17" />
+      </match>
+    </magic>
+  </mime-type>
+
   <mime-type type="application/x-adobe-indesign">
     <acronym>INDD</acronym>
     <_comment>Adobe InDesign document</_comment>
@@ -3760,8 +3771,6 @@
       <match value="OggS\000.......................\001vorbis" type="string"
              
mask="0xFFFFFFFF00000000000000000000000000000000000000000000000000FFFFFFFFFFFF"
              offset="0"/>
-      <match value="\x4f\x67\x67\x53\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00"
-             type="string" offset="0"/>
     </magic>
     <glob pattern="*.ogg"/>
     <sub-class-of type="audio/ogg"/>
@@ -5615,6 +5624,18 @@
     <sub-class-of type="application/ogg"/>
   </mime-type>
 
+  <mime-type type="video/daala">
+    <_comment>Ogg Daala Video</_comment>
+    <alias type="video/x-daala"/>
+    <magic priority="60">
+      <!-- Assumes Video stream comes before Audio, may not always -->
+      <match value="OggS\000.......................\x80daala" type="string"
+             
mask="0xFFFFFFFF00000000000000000000000000000000000000000000000000FFFFFFFFFFFF"
+             offset="0"/>
+    </magic>
+    <sub-class-of type="video/ogg"/>
+  </mime-type>
+
   <mime-type type="video/theora">
     <_comment>Ogg Theora Video</_comment>
     <alias type="video/x-theora"/>

Modified: 
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
 (original)
+++ 
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
 Sun Aug 31 19:36:36 2014
@@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.InputStream;
+import java.util.Locale;
 
 import org.apache.tika.io.IOUtils;
 
@@ -56,6 +57,7 @@ public class TypeDetectionBenchmark {
                     tika.detect(new ByteArrayInputStream(content));
                 }
                 System.out.printf(
+                        Locale.ROOT,
                         "%6dns per Tika.detect(%s) = %s%n",
                         System.currentTimeMillis() - start, file, type);
             } finally {

Modified: 
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
 (original)
+++ 
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
 Sun Aug 31 19:36:36 2014
@@ -19,7 +19,10 @@ package org.apache.tika.io;
 
 
 import org.junit.Test;
-import static org.junit.Assert.*;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 public class FilenameUtilsTest {
 

Modified: 
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
 (original)
+++ 
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
 Sun Aug 31 19:36:36 2014
@@ -23,6 +23,7 @@ import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
 import java.util.Arrays;
 
 import org.junit.Test;
@@ -68,7 +69,11 @@ public class TailStreamTest
      */
     private static InputStream generateStream(int from, int length)
     {
-        return new ByteArrayInputStream(generateText(from, length).getBytes());
+        try {
+            return new ByteArrayInputStream(generateText(from, 
length).getBytes("UTF-8"));
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
     }
 
     /**
@@ -123,7 +128,7 @@ public class TailStreamTest
         TailStream stream = new TailStream(generateStream(0, 2 * count), 
count);
         readStream(stream);
         assertEquals("Wrong buffer", generateText(count, count), new String(
-                stream.getTail()));
+                stream.getTail(), "UTF-8"));
     }
 
     /**
@@ -144,7 +149,7 @@ public class TailStreamTest
             read = stream.read(buf);
         }
         assertEquals("Wrong buffer", generateText(count - tailSize, tailSize),
-                new String(stream.getTail()));
+                new String(stream.getTail(), "UTF-8"));
         stream.close();
     }
 
@@ -164,7 +169,7 @@ public class TailStreamTest
         stream.reset();
         readStream(stream);
         assertEquals("Wrong buffer", generateText(tailSize, tailSize),
-                new String(stream.getTail()));
+                new String(stream.getTail(), "UTF-8"));
     }
 
     /**
@@ -180,7 +185,7 @@ public class TailStreamTest
         byte[] buf = new byte[count];
         stream.read(buf);
         assertEquals("Wrong buffer", generateText(count - tailSize, tailSize),
-                new String(stream.getTail()));
+                new String(stream.getTail(), "UTF-8"));
         stream.close();
     }
 
@@ -197,7 +202,7 @@ public class TailStreamTest
         assertEquals("Wrong skip result", skipCount, stream.skip(skipCount));
         assertEquals("Wrong buffer",
                 generateText(skipCount - tailSize, tailSize),
-                new String(stream.getTail()));
+                new String(stream.getTail(), "UTF-8"));
         stream.close();
     }
 
@@ -211,7 +216,7 @@ public class TailStreamTest
         TailStream stream = new TailStream(generateStream(0, count), 2 * 
count);
         assertEquals("Wrong skip result", count, stream.skip(2 * count));
         assertEquals("Wrong buffer", generateText(0, count),
-                new String(stream.getTail()));
+                new String(stream.getTail(), "UTF-8"));
         stream.close();
     }
 

Modified: 
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
 (original)
+++ 
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
 Sun Aug 31 19:36:36 2014
@@ -45,7 +45,7 @@ public class BodyContentHandlerTest {
         xhtml.element("p", "Test text");
         xhtml.endDocument();
 
-        assertEquals("Test text\n", buffer.toString());
+        assertEquals("Test text\n", buffer.toString("UTF-8"));
     }
 
 }

Modified: tika/branches/1.6/tika-example/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-example/pom.xml?rev=1621623&r1=1621617&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-example/pom.xml (original)
+++ tika/branches/1.6/tika-example/pom.xml Sun Aug 31 19:36:36 2014
@@ -23,7 +23,7 @@
     <parent>
         <artifactId>tika-parent</artifactId>
         <groupId>org.apache.tika</groupId>
-        <version>1.7-SNAPSHOT</version>
+        <version>1.6</version>
         <relativePath>../tika-parent/pom.xml</relativePath>
     </parent>
     <modelVersion>4.0.0</modelVersion>

Modified: tika/branches/1.6/tika-java7/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-java7/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-java7/pom.xml (original)
+++ tika/branches/1.6/tika-java7/pom.xml Sun Aug 31 19:36:36 2014
@@ -35,6 +35,11 @@
   <name>Apache Tika Java-7 Components</name>
   <description>Java-7 reliant components, including FileTypeDetector 
implementations</description>
 
+  <properties>
+    <maven.compiler.source>1.7</maven.compiler.source>
+    <maven.compiler.target>1.7</maven.compiler.target>
+  </properties>
+
   <build>
     <plugins>
       <plugin>
@@ -56,15 +61,6 @@
           </instructions>
         </configuration>
       </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <version>3.1</version>
-        <configuration>
-          <source>1.7</source>
-          <target>1.7</target>
-        </configuration>
-      </plugin>
     </plugins>
   </build>
 
@@ -87,8 +83,6 @@
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <scope>test</scope>
-      <version>4.11</version>
     </dependency>
   </dependencies>
 
@@ -98,9 +92,9 @@
        <url>http://www.apache.org</url>
   </organization>
   <scm>
-       <url>http://svn.apache.org/viewvc/tika/tags/1.6/tika-java7</url>
-       
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/tags/1.6/tika-java7</connection>
-       
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/tags/1.6/tika-java7</developerConnection>
+       <url>http://svn.apache.org/viewvc/tika/trunk/tika-java7</url>
+       
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/tika-java7</connection>
+       
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/tika-java7</developerConnection>
   </scm>
   <issueManagement>
        <system>JIRA</system>

Modified: tika/branches/1.6/tika-parent/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parent/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-parent/pom.xml (original)
+++ tika/branches/1.6/tika-parent/pom.xml Sun Aug 31 19:36:36 2014
@@ -242,15 +242,15 @@
       <dependency>
         <groupId>junit</groupId>
         <artifactId>junit</artifactId>
-        <version>4.10</version>
+        <version>4.11</version>
         <scope>test</scope>
       </dependency>
     </dependencies>
   </dependencyManagement>
 
   <properties>
-    <maven.compile.source>1.6</maven.compile.source>
-    <maven.compile.target>1.6</maven.compile.target>
+    <maven.compiler.source>1.6</maven.compiler.source>
+    <maven.compiler.target>1.6</maven.compiler.target>
     
<project.reporting.outputEncoding>${project.build.sourceEncoding}</project.reporting.outputEncoding>
   </properties>
 
@@ -258,11 +258,34 @@
     <plugins>
       <plugin>
         <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.1</version>
         <configuration>
-          <source>${maven.compile.source}</source>
-          <target>${maven.compile.target}</target>
+          <source>${maven.compiler.source}</source>
+          <target>${maven.compiler.target}</target>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>de.thetaphi</groupId>
+        <artifactId>forbiddenapis</artifactId>
+        <version>1.6.1</version>
+        <configuration>
+          <targetVersion>${maven.compiler.target}</targetVersion>
+          <internalRuntimeForbidden>true</internalRuntimeForbidden>
+          <failOnUnsupportedJava>false</failOnUnsupportedJava>
+          <bundledSignatures>
+            <bundledSignature>jdk-unsafe</bundledSignature>
+            <bundledSignature>jdk-deprecated</bundledSignature>
+          </bundledSignatures>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>check</goal>
+              <goal>testCheck</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
     <pluginManagement>
       <plugins>
@@ -324,8 +347,8 @@
   </profiles>
 
   <scm>
-    
<connection>scm:svn:http://svn.apache.org/repos/asf/maven/pom/tags/1.6/tika-parent</connection>
-    
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/maven/pom/tags/1.6/tika-parent</developerConnection>
-    <url>http://svn.apache.org/viewvc/maven/pom/tags/1.6/tika-parent</url>
+    
<connection>scm:svn:http://svn.apache.org/repos/asf/maven/pom/trunk/tika-parent</connection>
+    
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/maven/pom/trunk/tika-parent</developerConnection>
+    <url>http://svn.apache.org/viewvc/maven/pom/trunk/tika-parent</url>
   </scm>
 </project>

Modified: tika/branches/1.6/tika-parsers/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-parsers/pom.xml (original)
+++ tika/branches/1.6/tika-parsers/pom.xml Sun Aug 31 19:36:36 2014
@@ -44,8 +44,7 @@
     <pdfbox.version>1.8.6</pdfbox.version>
   </properties>
 
-  <dependencies>    
-
+  <dependencies>
     <!-- Optional OSGi dependency, used only when running within OSGi -->
     <dependency>
       <groupId>org.osgi</groupId>
@@ -101,11 +100,12 @@
       <artifactId>commons-compress</artifactId>
       <version>${compress.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.tukaani</groupId>
-      <artifactId>xz</artifactId>
+    <dependency> 
+      <groupId>org.tukaani</groupId> 
+      <artifactId>xz</artifactId> 
       <version>${tukaani.version}</version>
-    </dependency>
+   </dependency>
+
     <dependency>
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>
@@ -215,7 +215,6 @@
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <scope>test</scope>
     </dependency>
     <dependency>
        <groupId>org.mockito</groupId>
@@ -311,9 +310,9 @@
        <url>http://www.apache.org</url>
   </organization>
   <scm>
-       <url>http://svn.apache.org/viewvc/tika/tags/1.6/tika-parsers</url>
-       
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/tags/1.6/tika-parsers</connection>
-       
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/tags/1.6/tika-parsers</developerConnection>
+       <url>http://svn.apache.org/viewvc/tika/trunk/tika-parsers</url>
+       
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/tika-parsers</connection>
+       
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/tika-parsers</developerConnection>
   </scm>
   <issueManagement>
        <system>JIRA</system>

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
 Sun Aug 31 19:36:36 2014
@@ -212,9 +212,9 @@ public class ChmDirectoryListingSet {
             if (dir_chunk != null) {
 
                 int indexWorkData = ChmCommons.indexOf(dir_chunk,
-                        "::".getBytes());
+                        "::".getBytes("UTF-8"));
                 int indexUserData = ChmCommons.indexOf(dir_chunk,
-                        "/".getBytes());
+                        "/".getBytes("UTF-8"));
 
                 if (indexUserData < indexWorkData)
                     setPlaceHolder(indexUserData);
@@ -238,7 +238,7 @@ public class ChmDirectoryListingSet {
                             // dle.getNameLength()))));
                             dle.setName(new String(ChmCommons.copyOfRange(
                                     dir_chunk, getPlaceHolder(),
-                                    (getPlaceHolder() + 
dle.getNameLength()))));
+                                    (getPlaceHolder() + dle.getNameLength())), 
"UTF-8"));
                             checkControlData(dle);
                             checkResetTable(dle);
                             setPlaceHolder(getPlaceHolder()

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
 Sun Aug 31 19:36:36 2014
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.parser.chm.accessor;
 
+import java.io.UnsupportedEncodingException;
 import java.math.BigInteger;
 
 import org.apache.tika.exception.TikaException;
@@ -42,7 +43,7 @@ import org.apache.tika.parser.chm.except
 /* structure of ITSF headers */
 public class ChmItsfHeader implements ChmAccessor<ChmItsfHeader> {
     private static final long serialVersionUID = 2215291838533213826L;
-    private byte[] signature = new String("ITSF").getBytes(); /* 0 (ITSF) */
+    private byte[] signature;
     private int version; /* 4 */
     private int header_len; /* 8 */
     private int unknown_000c; /* c */
@@ -60,12 +61,24 @@ public class ChmItsfHeader implements Ch
     private int dataRemained;
     private int currentPlace = 0;
 
+    public ChmItsfHeader() {
+        try {
+            signature = ChmConstants.ITSF.getBytes("UTF-8"); /* 0 (ITSF) */
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
+    }
+
     /**
      * Prints the values of ChmfHeader
      */
     public String toString() {
         StringBuilder sb = new StringBuilder();
-        sb.append(new String(getSignature()) + " ");
+        try {
+            sb.append(new String(getSignature(), "UTF-8") + " ");
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
         sb.append(getVersion() + " ");
         sb.append(getHeaderLen() + " ");
         sb.append(getUnknown_000c() + " ");
@@ -458,9 +471,12 @@ public class ChmItsfHeader implements Ch
         chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data, 
chmItsfHeader.getUnknownLen()));
         chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data, 
chmItsfHeader.getDirOffset()));
         chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, 
chmItsfHeader.getDirLen()));
-
-        if (!new 
String(chmItsfHeader.getSignature()).equals(ChmConstants.ITSF))
-            throw new TikaException("seems not valid file");
+        try {
+            if (!new String(chmItsfHeader.getSignature(), 
"UTF-8").equals(ChmConstants.ITSF))
+                throw new TikaException("seems not valid file");
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
         if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
             if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
                 throw new TikaException("something wrong with header");

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
 Sun Aug 31 19:36:36 2014
@@ -22,6 +22,8 @@ import org.apache.tika.parser.chm.core.C
 import org.apache.tika.parser.chm.core.ChmConstants;
 import org.apache.tika.parser.chm.exception.ChmParsingException;
 
+import java.io.UnsupportedEncodingException;
+
 /**
  * Directory header The directory starts with a header; its format is as
  * follows: 0000: char[4] 'ITSP' 0004: DWORD Version number 1 0008: DWORD 
Length
@@ -45,11 +47,7 @@ import org.apache.tika.parser.chm.except
 public class ChmItspHeader implements ChmAccessor<ChmItspHeader> {
     // TODO: refactor all unmarshals
     private static final long serialVersionUID = 1962394421998181341L;
-    private byte[] signature = new String(ChmConstants.ITSP).getBytes(); /*
-                                                                          * 0
-                                                                          * 
(ITSP
-                                                                          * )
-                                                                          */
+    private byte[] signature;
     private int version; /* 4 */
     private int header_len; /* 8 */
     private int unknown_000c; /* c */
@@ -69,10 +67,26 @@ public class ChmItspHeader implements Ch
     private int dataRemained;
     private int currentPlace = 0;
 
+    public ChmItspHeader() {
+        try {
+            signature = ChmConstants.ITSP.getBytes("UTF-8"); /*
+                                                                          * 0
+                                                                          * 
(ITSP
+                                                                          * )
+                                                                          */
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
+    }
+
     public String toString() {
         StringBuilder sb = new StringBuilder();
-        sb.append("[ signature:=" + new String(getSignature())
-                + System.getProperty("line.separator"));
+        try {
+            sb.append("[ signature:=" + new String(getSignature(), "UTF-8")
+                    + System.getProperty("line.separator"));
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
         sb.append("version:=\t" + getVersion()
                 + System.getProperty("line.separator"));
         sb.append("header_len:=\t" + getHeader_len()
@@ -530,9 +544,12 @@ public class ChmItspHeader implements Ch
                         ChmConstants.BYTE_ARRAY_LENGHT));
 
         /* Checks validity of the itsp header */
-        if (!new 
String(chmItspHeader.getSignature()).equals(ChmConstants.ITSP))
-            throw new ChmParsingException("seems not valid signature");
-
+        try {
+            if (!new String(chmItspHeader.getSignature(), 
"UTF-8").equals(ChmConstants.ITSP))
+                throw new ChmParsingException("seems not valid signature");
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
         if (chmItspHeader.getVersion() != ChmConstants.CHM_VER_1)
             throw new ChmParsingException("!=ChmConstants.CHM_VER_1");
 

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
 Sun Aug 31 19:36:36 2014
@@ -21,6 +21,8 @@ import org.apache.tika.parser.chm.assert
 import org.apache.tika.parser.chm.core.ChmConstants;
 import org.apache.tika.parser.chm.exception.ChmParsingException;
 
+import java.io.UnsupportedEncodingException;
+
 /**
  * 
  * ::DataSpace/Storage/<SectionName>/ControlData This file contains $20 bytes 
of
@@ -40,11 +42,7 @@ public class ChmLzxcControlData implemen
     private static final long serialVersionUID = -7897854774939631565L;
     /* class' members */
     private long size; /* 0 */
-    private byte[] signature = new String(ChmConstants.LZXC).getBytes(); /*
-                                                                          * 4
-                                                                          * 
(LZXC
-                                                                          * )
-                                                                          */
+    private byte[] signature;
     private long version; /* 8 */
     private long resetInterval; /* c */
     private long windowSize; /* 10 */
@@ -55,6 +53,18 @@ public class ChmLzxcControlData implemen
     private int dataRemained;
     private int currentPlace = 0;
 
+    public ChmLzxcControlData() {
+        try {
+            signature = ChmConstants.LZXC.getBytes("UTF-8"); /*
+                                                              * 4
+                                                              * (LZXC
+                                                              * )
+                                                              */
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
+    }
+
     /**
      * Returns a remained data
      * 
@@ -247,8 +257,12 @@ public class ChmLzxcControlData implemen
     public String toString() {
         StringBuilder sb = new StringBuilder();
         sb.append("size(unknown):=" + this.getSize() + ", ");
-        sb.append("signature(Compression type identifier):="
-                + new String(this.getSignature()) + ", ");
+        try {
+            sb.append("signature(Compression type identifier):="
+                    + new String(this.getSignature(), "UTF-8") + ", ");
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
         sb.append("version(Possibly numeric code for LZX):="
                 + this.getVersion() + System.getProperty("line.separator"));
         sb.append("resetInterval(The Huffman reset interval):="
@@ -299,10 +313,14 @@ public class ChmLzxcControlData implemen
                     "window size / resetInterval should be more than 1");
 
         /* checks a signature */
-        if (!new String(chmLzxcControlData.getSignature())
-                .equals(ChmConstants.LZXC))
-            throw new ChmParsingException(
-                    "the signature does not seem to be correct");
+        try {
+            if (!new String(chmLzxcControlData.getSignature(), "UTF-8")
+                    .equals(ChmConstants.LZXC))
+                throw new ChmParsingException(
+                        "the signature does not seem to be correct");
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
     }
 
     /**

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
 Sun Aug 31 19:36:36 2014
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.parser.chm.accessor;
 
+import java.io.UnsupportedEncodingException;
 import java.util.Arrays;
 
 import org.apache.tika.exception.TikaException;
@@ -39,21 +40,27 @@ import org.apache.tika.parser.chm.except
  * <p>
  * Note: This class is not in use
  * 
- * {@link http
- * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original
- * /?show-translation-form=1 }
+ * {@link 
http://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original/?show-translation-form=1
 }
  * 
  * 
  */
 public class ChmPmgiHeader implements ChmAccessor<ChmPmgiHeader> {
     private static final long serialVersionUID = -2092282339894303701L;
-    private byte[] signature = new 
String(ChmConstants.CHM_PMGI_MARKER).getBytes(); /* 0 (PMGI) */
+    private byte[] signature;
     private long free_space; /* 4 */
 
     /* local usage */
     private int dataRemained;
     private int currentPlace = 0;
 
+    public ChmPmgiHeader() {
+        try {
+            signature = ChmConstants.CHM_PMGI_MARKER.getBytes("UTF-8"); /* 0 
(PMGI) */
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
+    }
+
     private int getDataRemained() {
         return dataRemained;
     }
@@ -77,8 +84,12 @@ public class ChmPmgiHeader implements Ch
         ChmAssert.assertChmAccessorNotNull(chmPmgiHeader);
         ChmAssert.assertPositiveInt(count);
         this.setDataRemained(data.length);
-        index = ChmCommons.indexOf(data,
-                ChmConstants.CHM_PMGI_MARKER.getBytes());
+        try {
+            index = ChmCommons.indexOf(data,
+                    ChmConstants.CHM_PMGI_MARKER.getBytes("UTF-8"));
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
         if (index >= 0)
             System.arraycopy(data, index, chmPmgiHeader.getSignature(), 0, 
count);
         else{
@@ -145,7 +156,11 @@ public class ChmPmgiHeader implements Ch
      */
     public String toString() {
         StringBuilder sb = new StringBuilder();
-        sb.append("signature:=" + new String(getSignature()) + ", ");
+        try {
+            sb.append("signature:=" + new String(getSignature(), "UTF-8") + ", 
");
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
         sb.append("free space:=" + getFreeSpace()
                 + System.getProperty("line.separator"));
         return sb.toString();
@@ -162,10 +177,14 @@ public class ChmPmgiHeader implements Ch
         chmPmgiHeader.setFreeSpace(chmPmgiHeader.unmarshalUInt32(data, 
chmPmgiHeader.getFreeSpace()));
 
         /* check structure */
-        if (!Arrays.equals(chmPmgiHeader.getSignature(),
-                ChmConstants.CHM_PMGI_MARKER.getBytes()))
-            throw new TikaException(
-                    "it does not seem to be valid a PMGI signature, check 
ChmItsp index_root if it was -1, means no PMGI, use PMGL insted");
+        try {
+            if (!Arrays.equals(chmPmgiHeader.getSignature(),
+                    ChmConstants.CHM_PMGI_MARKER.getBytes("UTF-8")))
+                throw new TikaException(
+                        "it does not seem to be valid a PMGI signature, check 
ChmItsp index_root if it was -1, means no PMGI, use PMGL insted");
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
 
     }
 

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
 Sun Aug 31 19:36:36 2014
@@ -21,6 +21,9 @@ import org.apache.tika.parser.chm.assert
 import org.apache.tika.parser.chm.core.ChmConstants;
 import org.apache.tika.parser.chm.exception.ChmParsingException;
 
+import java.io.UnsupportedEncodingException;
+import java.util.UnknownFormatConversionException;
+
 /**
  * Description There are two types of directory chunks -- index chunks, and
  * listing chunks. The index chunk will be omitted if there is only one listing
@@ -55,11 +58,7 @@ import org.apache.tika.parser.chm.except
  */
 public class ChmPmglHeader implements ChmAccessor<ChmPmglHeader> {
     private static final long serialVersionUID = -6139486487475923593L;
-    private byte[] signature = new String(ChmConstants.PMGL).getBytes(); /*
-                                                                          * 0
-                                                                          * 
(PMGL
-                                                                          * )
-                                                                          */
+    private byte[] signature;
     private long free_space; /* 4 */
     private long unknown_0008; /* 8 */
     private int block_prev; /* c */
@@ -69,6 +68,18 @@ public class ChmPmglHeader implements Ch
     private int dataRemained;
     private int currentPlace = 0;
 
+    public ChmPmglHeader() {
+        try {
+            signature = ChmConstants.PMGL.getBytes("UTF-8"); /*
+                                                                          * 0
+                                                                          * 
(PMGL
+                                                                          * )
+                                                                          */
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
+    }
+
     private int getDataRemained() {
         return dataRemained;
     }
@@ -95,7 +106,11 @@ public class ChmPmglHeader implements Ch
 
     public String toString() {
         StringBuilder sb = new StringBuilder();
-        sb.append("signatute:=" + new String(getSignature()) + ", ");
+        try {
+            sb.append("signatute:=" + new String(getSignature(), "UTF-8") + ", 
");
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
         sb.append("free space:=" + getFreeSpace() + ", ");
         sb.append("unknown0008:=" + getUnknown0008() + ", ");
         sb.append("prev block:=" + getBlockPrev() + ", ");
@@ -160,10 +175,13 @@ public class ChmPmglHeader implements Ch
                 chmPmglHeader.getBlockNext()));
 
         /* check structure */
-        if (!new 
String(chmPmglHeader.getSignature()).equals(ChmConstants.PMGL))
-            throw new ChmParsingException(ChmPmglHeader.class.getName()
-                    + " pmgl != pmgl.signature");
-
+        try {
+            if (!new String(chmPmglHeader.getSignature(), 
"UTF-8").equals(ChmConstants.PMGL))
+                throw new ChmParsingException(ChmPmglHeader.class.getName()
+                        + " pmgl != pmgl.signature");
+        } catch (UnsupportedEncodingException e) {
+            throw new AssertionError("UTF-8 not supported.");
+        }
     }
 
     public byte[] getSignature() {

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
 Sun Aug 31 19:36:36 2014
@@ -174,7 +174,7 @@ public class ChmExtractor {
 
             int indexOfControlData = getChmDirList().getControlDataIndex();
             int indexOfResetData = ChmCommons.indexOfResetTableBlock(getData(),
-                    ChmConstants.LZXC.getBytes());
+                    ChmConstants.LZXC.getBytes("UTF-8"));
             byte[] dir_chunk = null;
             if (indexOfResetData > 0)
                 dir_chunk = ChmCommons.copyOfRange( getData(), 
indexOfResetData, indexOfResetData  

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java
 Sun Aug 31 19:36:36 2014
@@ -20,6 +20,7 @@ import java.io.Writer;
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.List;
+import java.util.Locale;
 
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.sax.WriteOutContentHandler;
@@ -83,8 +84,8 @@ public class BoilerpipeContentHandler ex
 
         @Override
         public String toString() {
-            return String.format("<%s> of type %s", localName, elementType);
-        };
+            return String.format(Locale.ROOT, "<%s> of type %s", localName, 
elementType);
+        }
 
         public String getUri() {
             return uri;

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
 Sun Aug 31 19:36:36 2014
@@ -245,7 +245,9 @@ public class ImageMetadataExtractor {
     }
     
     static class ExifHandler implements DirectoryHandler {
-        private static final SimpleDateFormat DATE_UNSPECIFIED_TZ = new 
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+        // There's a new ExifHandler for each file processed, so this is 
thread safe
+        private final SimpleDateFormat DATE_UNSPECIFIED_TZ = new 
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ROOT);
+
         public boolean supports(Class<? extends Directory> directoryType) {
             return directoryType == ExifIFD0Directory.class || 
                     directoryType == ExifSubIFDDirectory.class;


Reply via email to