Author: mattmann
Date: Sun Aug 31 19:36:36 2014
New Revision: 1621623
URL: http://svn.apache.org/r1621623
Log:
Bring 1.6 branch up to date with trunk in prep for 1.6 RC #2.
Added:
tika/branches/1.6/tika-example/
- copied from r1621617, tika/trunk/tika-example/
tika/branches/1.6/tika-parsers/src/test/resources/test-documents/testComment.xls
- copied unchanged from r1621617,
tika/trunk/tika-parsers/src/test/resources/test-documents/testComment.xls
tika/branches/1.6/tika-parsers/src/test/resources/test-documents/testComment.xlsx
- copied unchanged from r1621617,
tika/trunk/tika-parsers/src/test/resources/test-documents/testComment.xlsx
tika/branches/1.6/tika-parsers/src/test/resources/test-documents/testTXT-tika.axx
- copied unchanged from r1621617,
tika/trunk/tika-parsers/src/test/resources/test-documents/testTXT-tika.axx
tika/branches/1.6/tika-server/src/main/java/org/apache/tika/server/TikaLoggingFilter.java
- copied unchanged from r1621617,
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaLoggingFilter.java
tika/branches/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/ExternalTranslator.java
- copied unchanged from r1621617,
tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/ExternalTranslator.java
tika/branches/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
- copied unchanged from r1621617,
tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
tika/branches/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
- copied unchanged from r1621617,
tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
tika/branches/1.6/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties
- copied unchanged from r1621617,
tika/trunk/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties
tika/branches/1.6/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties
- copied unchanged from r1621617,
tika/trunk/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties
tika/branches/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/Lingo24TranslatorTest.java
- copied unchanged from r1621617,
tika/trunk/tika-translate/src/test/java/org/apache/tika/language/translate/Lingo24TranslatorTest.java
tika/branches/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/MosesTranslatorTest.java
- copied unchanged from r1621617,
tika/trunk/tika-translate/src/test/java/org/apache/tika/language/translate/MosesTranslatorTest.java
Removed:
tika/branches/1.6/src/
Modified:
tika/branches/1.6/ (props changed)
tika/branches/1.6/CHANGES.txt
tika/branches/1.6/pom.xml
tika/branches/1.6/tika-app/pom.xml
tika/branches/1.6/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
tika/branches/1.6/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
tika/branches/1.6/tika-bundle/pom.xml
tika/branches/1.6/tika-core/pom.xml
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java
tika/branches/1.6/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
tika/branches/1.6/tika-example/pom.xml
tika/branches/1.6/tika-java7/pom.xml
tika/branches/1.6/tika-parent/pom.xml
tika/branches/1.6/tika-parsers/pom.xml
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/iwork/AutoPageNumberUtils.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/CompositeTagHandler.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/TikaTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmBlockInfo.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/code/SourceCodeParserTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
tika/branches/1.6/tika-parsers/src/test/resources/test-documents/testMP3id3v2.mp3
tika/branches/1.6/tika-parsers/src/test/resources/test-documents/testMP3id3v24.mp3
tika/branches/1.6/tika-serialization/ (props changed)
tika/branches/1.6/tika-serialization/pom.xml
tika/branches/1.6/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java
tika/branches/1.6/tika-server/pom.xml
tika/branches/1.6/tika-server/src/main/java/org/apache/tika/server/TikaMimeTypes.java
tika/branches/1.6/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
tika/branches/1.6/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
tika/branches/1.6/tika-server/src/main/java/org/apache/tika/server/TikaWelcome.java
tika/branches/1.6/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
tika/branches/1.6/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java
tika/branches/1.6/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java
tika/branches/1.6/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
tika/branches/1.6/tika-server/src/test/java/org/apache/tika/server/TikaDetectorsTest.java
tika/branches/1.6/tika-server/src/test/java/org/apache/tika/server/TikaWelcomeTest.java
tika/branches/1.6/tika-translate/ (props changed)
tika/branches/1.6/tika-translate/pom.xml
tika/branches/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
tika/branches/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/MicrosoftTranslator.java
tika/branches/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/CachedTranslatorTest.java
tika/branches/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/MicrosoftTranslatorTest.java
tika/branches/1.6/tika-xmp/pom.xml
tika/branches/1.6/tika-xmp/src/test/java/org/apache/tika/xmp/XMPMetadataTest.java
Propchange: tika/branches/1.6/
------------------------------------------------------------------------------
Merged
/tika/trunk:r1613865-1615129,1615131-1615173,1615175-1615623,1615625-1615630,1615632-1619107,1619109-1621617
Modified: tika/branches/1.6/CHANGES.txt
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/CHANGES.txt?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/CHANGES.txt (original)
+++ tika/branches/1.6/CHANGES.txt Sun Aug 31 19:36:36 2014
@@ -1,4 +1,19 @@
-Release 1.6 - 07/27/2014
+Release 1.6 - 08/31/2014
+
+ * Parse output should indicate which Parser was actually used
+ (TIKA-674).
+
+ * Use the forbidden-apis Maven plugin to check for unsafe Java
+ operations (TIKA-1387).
+
+ * Created an ExternalTranslator class to interface with command
+ line Translators (TIKA-1385).
+
+ * Created a MosesTranslator as a subclass of ExternalTranslator
+ that calls the Moses Decoder machine translation program (TIKA-1385).
+
+ * Created the tika-example module. It will have examples of how to
+ use the main Tika interfaces (TIKA-1390).
* Upgraded to Commons Compress 1.8.1 (TIKA-1275).
Modified: tika/branches/1.6/pom.xml
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/pom.xml (original)
+++ tika/branches/1.6/pom.xml Sun Aug 31 19:36:36 2014
@@ -36,12 +36,12 @@
<scm>
<connection>
- scm:svn:http://svn.apache.org/repos/asf/tika/tags/1.6
+ scm:svn:http://svn.apache.org/repos/asf/tika/trunk
</connection>
<developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/tika/tags/1.6
+ scm:svn:https://svn.apache.org/repos/asf/tika/trunk
</developerConnection>
- <url>http://svn.apache.org/viewvc/tika/tags/1.6</url>
+ <url>http://svn.apache.org/viewvc/tika/trunk</url>
</scm>
<modules>
@@ -54,42 +54,9 @@
<module>tika-bundle</module>
<module>tika-server</module>
<module>tika-translate</module>
+ <module>tika-example</module>
</modules>
- <build>
- <plugins>
- <plugin>
- <artifactId>maven-deploy-plugin</artifactId>
- <configuration>
- <skip>true</skip> <!-- No need to deploy the reactor -->
- </configuration>
- </plugin>
- <plugin>
- <artifactId>maven-site-plugin</artifactId>
- <configuration>
- <templateDirectory>src/site</templateDirectory>
- <template>site.vm</template>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.rat</groupId>
- <artifactId>apache-rat-plugin</artifactId>
- <configuration>
- <excludes>
- <exclude>.*/**</exclude>
- <exclude>CHANGES.txt</exclude>
- <exclude>tika-dotnet/AssemblyInfo.cs</exclude>
- <exclude>tika-dotnet/Tika.csproj</exclude>
- <exclude>tika-dotnet/Tika.sln</exclude>
- <exclude>tika-dotnet/Tika.sln.cache</exclude>
- <exclude>tika-dotnet/obj/**</exclude>
- <exclude>tika-dotnet/target/**</exclude>
- </excludes>
- </configuration>
- </plugin>
- </plugins>
- </build>
-
<profiles>
<profile>
<id>apache-release</id>
Modified: tika/branches/1.6/tika-app/pom.xml
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-app/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-app/pom.xml (original)
+++ tika/branches/1.6/tika-app/pom.xml Sun Aug 31 19:36:36 2014
@@ -25,7 +25,7 @@
<parent>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parent</artifactId>
- <version>1.6</version>
+ <version>1.7-SNAPSHOT</version>
<relativePath>../tika-parent/pom.xml</relativePath>
</parent>
@@ -66,8 +66,6 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <scope>test</scope>
- <version>4.11</version>
</dependency>
<dependency>
<artifactId>commons-io</artifactId>
@@ -230,9 +228,9 @@
<url>http://www.apache.org</url>
</organization>
<scm>
- <url>http://svn.apache.org/viewvc/tika/tags/1.6/tika-app</url>
-
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/tags/1.6/tika-app</connection>
-
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/tags/1.6/tika-app</developerConnection>
+ <url>http://svn.apache.org/viewvc/tika/trunk/tika-app</url>
+
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/tika-app</connection>
+
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/tika-app</developerConnection>
</scm>
<issueManagement>
<system>JIRA</system>
Modified:
tika/branches/1.6/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
(original)
+++ tika/branches/1.6/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Sun Aug 31 19:36:36 2014
@@ -31,13 +31,15 @@ import java.net.ServerSocket;
import java.net.Socket;
import java.net.URI;
import java.net.URL;
+import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
-import java.util.Map.Entry;
+import java.util.Locale;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
@@ -656,11 +658,11 @@ public class TikaCLI {
if (encoding != null) {
return new OutputStreamWriter(output, encoding);
} else if (System.getProperty("os.name")
- .toLowerCase().startsWith("mac os x")) {
+ .toLowerCase(Locale.ROOT).startsWith("mac os x")) {
// TIKA-324: Override the default encoding on Mac OS X
return new OutputStreamWriter(output, "UTF-8");
} else {
- return new OutputStreamWriter(output);
+ return new OutputStreamWriter(output, Charset.defaultCharset());
}
}
@@ -759,6 +761,7 @@ public class TikaCLI {
// being a CLI program messages should go to the stderr too
//
String msg = String.format(
+ Locale.ROOT,
"Ignoring unexpected exception trying to save embedded
file %s (%s)",
name,
e.getMessage()
@@ -821,13 +824,17 @@ public class TikaCLI {
@Override
public void run() {
try {
+ InputStream input = null;
try {
InputStream rawInput = socket.getInputStream();
OutputStream output = socket.getOutputStream();
- InputStream input = TikaInputStream.get(rawInput);
+ input = TikaInputStream.get(rawInput);
type.process(input, output, new Metadata());
output.flush();
} finally {
+ if (input != null) {
+ input.close();
+ }
socket.close();
}
} catch (Exception e) {
Modified:
tika/branches/1.6/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
(original)
+++
tika/branches/1.6/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
Sun Aug 31 19:36:36 2014
@@ -20,6 +20,7 @@ import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.PrintStream;
import java.net.URI;
+import java.util.Locale;
import org.apache.commons.io.FileUtils;
@@ -39,14 +40,15 @@ public class TikaCLITest {
private ByteArrayOutputStream outContent = null;
private PrintStream stdout = null;
private URI testDataURI = new
File("src/test/resources/test-data/").toURI();
- private String resourcePrefix = testDataURI.toString();
+ private String resourcePrefix;
@Before
public void setUp() throws Exception {
profile = new File("welsh.ngp");
outContent = new ByteArrayOutputStream();
+ resourcePrefix = testDataURI.toString();
stdout = System.out;
- System.setOut(new PrintStream(outContent));
+ System.setOut(new PrintStream(outContent, true, "UTF-8"));
}
/**
@@ -70,7 +72,7 @@ public class TikaCLITest {
public void testListParserDetail() throws Exception{
String[] params = {"--list-parser-detail"};
TikaCLI.main(params);
-
assertTrue(outContent.toString().contains("application/vnd.oasis.opendocument.text-web"));
+
assertTrue(outContent.toString("UTF-8").contains("application/vnd.oasis.opendocument.text-web"));
}
/**
@@ -83,7 +85,7 @@ public class TikaCLITest {
String[] params = {"--list-parser"};
TikaCLI.main(params);
//Assert was commented temporarily for finding the problem
- // Assert.assertTrue(outContent != null &&
outContent.toString().contains("org.apache.tika.parser.iwork.IWorkPackageParser"));
+ // Assert.assertTrue(outContent != null &&
outContent.toString("UTF-8").contains("org.apache.tika.parser.iwork.IWorkPackageParser"));
}
/**
@@ -95,7 +97,7 @@ public class TikaCLITest {
public void testXMLOutput() throws Exception{
String[] params = {"-x", resourcePrefix + "alice.cli.test"};
TikaCLI.main(params);
- assertTrue(outContent.toString().contains("?xml version=\"1.0\"
encoding=\"UTF-8\"?"));
+ assertTrue(outContent.toString("UTF-8").contains("?xml version=\"1.0\"
encoding=\"UTF-8\"?"));
}
/**
@@ -107,9 +109,9 @@ public class TikaCLITest {
public void testHTMLOutput() throws Exception{
String[] params = {"-h", resourcePrefix + "alice.cli.test"};
TikaCLI.main(params);
- assertTrue(outContent.toString().contains("html
xmlns=\"http://www.w3.org/1999/xhtml"));
+ assertTrue(outContent.toString("UTF-8").contains("html
xmlns=\"http://www.w3.org/1999/xhtml"));
assertTrue("Expanded <title></title> element should be present",
- outContent.toString().contains("<title></title>"));
+ outContent.toString("UTF-8").contains("<title></title>"));
}
/**
@@ -121,7 +123,7 @@ public class TikaCLITest {
public void testTextOutput() throws Exception{
String[] params = {"-t", resourcePrefix + "alice.cli.test"};
TikaCLI.main(params);
- assertTrue(outContent.toString().contains("finished off the cake"));
+ assertTrue(outContent.toString("UTF-8").contains("finished off the
cake"));
}
/**
@@ -132,7 +134,7 @@ public class TikaCLITest {
public void testMetadataOutput() throws Exception{
String[] params = {"-m", resourcePrefix + "alice.cli.test"};
TikaCLI.main(params);
- assertTrue(outContent.toString().contains("text/plain"));
+ assertTrue(outContent.toString("UTF-8").contains("text/plain"));
}
/**
@@ -144,7 +146,7 @@ public class TikaCLITest {
public void testJsonMetadataOutput() throws Exception {
String[] params = {"--json", resourcePrefix +
"testJsonMultipleInts.html"};
TikaCLI.main(params);
- String json = outContent.toString();
+ String json = outContent.toString("UTF-8");
//TIKA-1310
assertTrue(json.contains("\"fb:admins\":\"1,2,3,4\","));
@@ -165,7 +167,7 @@ public class TikaCLITest {
public void testLanguageOutput() throws Exception{
String[] params = {"-l", resourcePrefix + "alice.cli.test"};
TikaCLI.main(params);
- assertTrue(outContent.toString().contains("en"));
+ assertTrue(outContent.toString("UTF-8").contains("en"));
}
/**
@@ -177,7 +179,7 @@ public class TikaCLITest {
public void testDetectOutput() throws Exception{
String[] params = {"-d", resourcePrefix + "alice.cli.test"};
TikaCLI.main(params);
- assertTrue(outContent.toString().contains("text/plain"));
+ assertTrue(outContent.toString("UTF-8").contains("text/plain"));
}
/**
@@ -189,7 +191,7 @@ public class TikaCLITest {
public void testListMetModels() throws Exception{
String[] params = {"--list-met-models", resourcePrefix +
"alice.cli.test"};
TikaCLI.main(params);
- assertTrue(outContent.toString().contains("text/plain"));
+ assertTrue(outContent.toString("UTF-8").contains("text/plain"));
}
/**
@@ -201,7 +203,7 @@ public class TikaCLITest {
public void testListSupportedTypes() throws Exception{
String[] params = {"--list-supported-types", resourcePrefix +
"alice.cli.test"};
TikaCLI.main(params);
- assertTrue(outContent.toString().contains("supertype:
application/octet-stream"));
+ assertTrue(outContent.toString("UTF-8").contains("supertype:
application/octet-stream"));
}
/**
@@ -274,7 +276,7 @@ public class TikaCLITest {
public void testMultiValuedMetadata() throws Exception {
String[] params = {"-m", resourcePrefix +
"testMultipleSheets.numbers"};
TikaCLI.main(params);
- String content = outContent.toString();
+ String content = outContent.toString("UTF-8");
assertTrue(content.contains("sheetNames: Checking"));
assertTrue(content.contains("sheetNames: Secon sheet"));
assertTrue(content.contains("sheetNames: Logical Sheet 3"));
@@ -288,7 +290,7 @@ public class TikaCLITest {
new File("subdir/foo.txt").delete();
new File("subdir").delete();
TikaCLI.main(params);
- String content = outContent.toString();
+ String content = outContent.toString("UTF-8");
assertTrue(content.contains("Extracting 'subdir/foo.txt'"));
// clean up. TODO: These should be in target.
new File("target/subdir/foo.txt").delete();
Modified: tika/branches/1.6/tika-bundle/pom.xml
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-bundle/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-bundle/pom.xml (original)
+++ tika/branches/1.6/tika-bundle/pom.xml Sun Aug 31 19:36:36 2014
@@ -61,8 +61,6 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <scope>test</scope>
- <version>4.11</version>
</dependency>
<dependency>
<groupId>org.ops4j.pax.exam</groupId>
@@ -253,6 +251,16 @@
</execution>
</executions>
</plugin>
+
+ <!-- The Tika Bundle has no java code of its own, so no need to do -->
+ <!-- any forbidden API checking against it (it gets confused...) -->
+ <plugin>
+ <groupId>de.thetaphi</groupId>
+ <artifactId>forbiddenapis</artifactId>
+ <configuration>
+ <skip>true</skip>
+ </configuration>
+ </plugin>
</plugins>
</build>
@@ -309,9 +317,9 @@
<url>http://www.apache.org</url>
</organization>
<scm>
- <url>http://svn.apache.org/viewvc/tika/tags/1.6/tika-bundle</url>
-
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/tags/1.6/tika-bundle</connection>
-
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/tags/1.6/tika-bundle</developerConnection>
+ <url>http://svn.apache.org/viewvc/tika/trunk/tika-bundle</url>
+
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/tika-bundle</connection>
+
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/tika-bundle</developerConnection>
</scm>
<issueManagement>
<system>JIRA</system>
Modified: tika/branches/1.6/tika-core/pom.xml
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-core/pom.xml (original)
+++ tika/branches/1.6/tika-core/pom.xml Sun Aug 31 19:36:36 2014
@@ -60,8 +60,6 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <scope>test</scope>
- <version>4.11</version>
</dependency>
</dependencies>
@@ -160,9 +158,9 @@
<url>http://www.apache.org</url>
</organization>
<scm>
- <url>http://svn.apache.org/viewvc/tika/tags/1.6/core</url>
-
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/tags/1.6/core</connection>
-
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/tags/1.6/core</developerConnection>
+ <url>http://svn.apache.org/viewvc/tika/trunk/core</url>
+
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/core</connection>
+
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/core</developerConnection>
</scm>
<issueManagement>
<system>JIRA</system>
Modified:
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java
(original)
+++
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java
Sun Aug 31 19:36:36 2014
@@ -19,9 +19,11 @@ package org.apache.tika.detect;
import java.io.CharArrayWriter;
import java.io.IOException;
import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
+import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -95,9 +97,13 @@ public class MagicDetector implements De
|| type.equals("unicodeBE")) {
decoded = decodeString(value, type);
} else if (type.equals("stringignorecase")) {
- decoded = decodeString(value.toLowerCase(), type);
+ decoded = decodeString(value.toLowerCase(Locale.ROOT), type);
} else if (type.equals("byte")) {
- decoded = tmpVal.getBytes();
+ try {
+ decoded = tmpVal.getBytes("UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
} else if (type.equals("host16") || type.equals("little16")) {
int i = Integer.parseInt(tmpVal, radix);
decoded = new byte[] { (byte) (i & 0x00FF), (byte) (i >> 8) };
@@ -393,7 +399,7 @@ public class MagicDetector implements De
flags = Pattern.CASE_INSENSITIVE;
}
- Pattern p = Pattern.compile(new String(this.pattern), flags);
+ Pattern p = Pattern.compile(new String(this.pattern, "UTF-8"),
flags);
ByteBuffer bb = ByteBuffer.wrap(buffer);
CharBuffer result = ISO_8859_1.decode(bb);
Modified:
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
(original)
+++
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
Sun Aug 31 19:36:36 2014
@@ -17,6 +17,7 @@
package org.apache.tika.io;
import java.util.HashSet;
+import java.util.Locale;
public class FilenameUtils {
@@ -65,7 +66,7 @@ public class FilenameUtils {
for (char c: name.toCharArray()) {
if (RESERVED.contains(c)) {
- sb.append('%').append((c<16) ? "0" :
"").append(Integer.toHexString(c).toUpperCase());
+ sb.append('%').append((c<16) ? "0" :
"").append(Integer.toHexString(c).toUpperCase(Locale.ROOT));
} else {
sb.append(c);
}
Modified:
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/IOUtils.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
(original)
+++ tika/branches/1.6/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
Sun Aug 31 19:36:36 2014
@@ -28,6 +28,7 @@ import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.channels.Channel;
import java.util.ArrayList;
@@ -254,7 +255,7 @@ public class IOUtils {
*/
@Deprecated
public static byte[] toByteArray(String input) throws IOException {
- return input.getBytes();
+ return input.getBytes("UTF-8");
}
// read char[]
@@ -392,7 +393,7 @@ public class IOUtils {
*/
@Deprecated
public static String toString(byte[] input) throws IOException {
- return new String(input);
+ return new String(input, "UTF-8");
}
/**
@@ -412,8 +413,9 @@ public class IOUtils {
@Deprecated
public static String toString(byte[] input, String encoding)
throws IOException {
+ // If no encoding is specified, default to UTF-8.
if (encoding == null) {
- return new String(input);
+ return new String(input, "UTF-8");
} else {
return new String(input, encoding);
}
@@ -435,7 +437,7 @@ public class IOUtils {
* @since Commons IO 1.1
*/
public static List<String> readLines(InputStream input) throws IOException
{
- InputStreamReader reader = new InputStreamReader(input);
+ InputStreamReader reader = new InputStreamReader(input, "UTF-8");
return readLines(reader);
}
@@ -529,8 +531,13 @@ public class IOUtils {
* @since Commons IO 1.1
*/
public static InputStream toInputStream(String input) {
- byte[] bytes = input.getBytes();
- return new ByteArrayInputStream(bytes);
+ try {
+ byte[] bytes = input.getBytes("UTF-8");
+ return new ByteArrayInputStream(bytes);
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
+
}
/**
@@ -547,7 +554,7 @@ public class IOUtils {
* @since Commons IO 1.1
*/
public static InputStream toInputStream(String input, String encoding)
throws IOException {
- byte[] bytes = encoding != null ? input.getBytes(encoding) :
input.getBytes();
+ byte[] bytes = encoding != null ? input.getBytes(encoding) :
input.getBytes("UTF-8");
return new ByteArrayInputStream(bytes);
}
@@ -585,7 +592,7 @@ public class IOUtils {
*/
public static void write(byte[] data, Writer output) throws IOException {
if (data != null) {
- output.write(new String(data));
+ output.write(new String(data, "UTF-8"));
}
}
@@ -653,7 +660,7 @@ public class IOUtils {
public static void write(char[] data, OutputStream output)
throws IOException {
if (data != null) {
- output.write(new String(data).getBytes());
+ output.write(new String(data).getBytes("UTF-8"));
}
}
@@ -779,7 +786,7 @@ public class IOUtils {
public static void write(String data, OutputStream output)
throws IOException {
if (data != null) {
- output.write(data.getBytes());
+ output.write(data.getBytes("UTF-8"));
}
}
@@ -848,7 +855,7 @@ public class IOUtils {
public static void write(StringBuffer data, OutputStream output)
throws IOException {
if (data != null) {
- output.write(data.toString().getBytes());
+ output.write(data.toString().getBytes("UTF-8"));
}
}
@@ -954,7 +961,7 @@ public class IOUtils {
*/
public static void copy(InputStream input, Writer output)
throws IOException {
- InputStreamReader in = new InputStreamReader(input);
+ InputStreamReader in = new InputStreamReader(input, "UTF-8");
copy(in, output);
}
@@ -1061,7 +1068,7 @@ public class IOUtils {
*/
public static void copy(Reader input, OutputStream output)
throws IOException {
- OutputStreamWriter out = new OutputStreamWriter(output);
+ OutputStreamWriter out = new OutputStreamWriter(output, "UTF-8");
copy(input, out);
// XXX Unless anyone is planning on rewriting OutputStreamWriter, we
// have to flush here.
Modified:
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java
(original)
+++
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/language/LanguageProfilerBuilder.java
Sun Aug 31 19:36:36 2014
@@ -405,7 +405,7 @@ public class LanguageProfilerBuilder {
*/
public void save(OutputStream os) throws IOException {
os.write(("# NgramProfile generated at " + new Date() +
- " for Apache Tika Language Identification\n").getBytes());
+ " for Apache Tika Language
Identification\n").getBytes("UTF-8"));
// And then each ngram
Modified:
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java
(original)
+++
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java
Sun Aug 31 19:36:36 2014
@@ -60,6 +60,11 @@ public interface XMPDM {
Property ARTIST = Property.externalText("xmpDM:artist");
/**
+ * "The name of the album artist or group for compilation albums."
+ */
+ Property ALBUM_ARTIST = Property.externalText("xmpDM:albumArtist");
+
+ /**
* "The date and time when the audio was last modified."
*/
Property AUDIO_MOD_DATE = Property.internalDate("xmpDM:audioModDate");
@@ -142,6 +147,11 @@ public interface XMPDM {
// Property BEAT_SPLICE_PARAMS = "xmpDM:beatSpliceParams";
/**
+ * "An album created by various artists."
+ */
+ Property COMPILATION = Property.externalInteger("xmpDM:compilation");
+
+ /**
* "The composer's name."
*/
Property COMPOSER = Property.externalText("xmpDM:composer");
@@ -157,6 +167,11 @@ public interface XMPDM {
Property COPYRIGHT = Property.externalText("xmpDM:copyright");
/**
+ * "The disc number for part of an album set."
+ */
+ Property DISC_NUMBER = Property.externalInteger("xmpDM:discNumber");
+
+ /**
* "The duration of the media file."
*/
Property DURATION = Property.externalReal("xmpDM:duration");
Modified:
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
(original)
+++
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
Sun Aug 31 19:36:36 2014
@@ -239,6 +239,7 @@ public class CompositeParser extends Abs
TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
TaggedContentHandler taggedHandler =
handler != null ? new TaggedContentHandler(handler) : null;
+ metadata.add("X-Parsed-By", parser.getClass().getName());
try {
parser.parse(taggedStream, taggedHandler, metadata, context);
} catch (RuntimeException e) {
Modified:
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
(original)
+++
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
Sun Aug 31 19:36:36 2014
@@ -24,6 +24,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
+import java.io.UnsupportedEncodingException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
@@ -231,7 +232,7 @@ public class ExternalParser extends Abst
*/
private void extractOutput(InputStream stream, XHTMLContentHandler xhtml)
throws SAXException, IOException {
- Reader reader = new InputStreamReader(stream);
+ Reader reader = new InputStreamReader(stream, "UTF-8");
try {
xhtml.startDocument();
xhtml.startElement("p");
@@ -291,7 +292,12 @@ public class ExternalParser extends Abst
private void extractMetadata(final InputStream stream, final Metadata
metadata) {
new Thread() {
public void run() {
- BufferedReader reader = new BufferedReader(new
InputStreamReader(stream));
+ BufferedReader reader;
+ try {
+ reader = new BufferedReader(new InputStreamReader(stream,
"UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
try {
String line;
while ( (line = reader.readLine()) != null ) {
@@ -303,6 +309,7 @@ public class ExternalParser extends Abst
}
}
} catch (IOException e) {
+ // Ignore
} finally {
IOUtils.closeQuietly(reader);
IOUtils.closeQuietly(stream);
Modified:
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
(original)
+++
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
Sun Aug 31 19:36:36 2014
@@ -22,6 +22,7 @@ import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
+import java.nio.charset.Charset;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
@@ -57,7 +58,7 @@ public class ToTextContentHandler extend
* @param stream output stream
*/
public ToTextContentHandler(OutputStream stream) {
- this(new OutputStreamWriter(stream));
+ this(new OutputStreamWriter(stream, Charset.defaultCharset()));
}
/**
Modified:
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
(original)
+++
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
Sun Aug 31 19:36:36 2014
@@ -21,6 +21,7 @@ import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.io.StringWriter;
import java.io.Writer;
+import java.nio.charset.Charset;
import java.util.UUID;
import org.xml.sax.ContentHandler;
@@ -90,7 +91,7 @@ public class WriteOutContentHandler exte
* @param stream output stream
*/
public WriteOutContentHandler(OutputStream stream) {
- this(new OutputStreamWriter(stream));
+ this(new OutputStreamWriter(stream, Charset.defaultCharset()));
}
/**
Modified:
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java
(original)
+++
tika/branches/1.6/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java
Sun Aug 31 19:36:36 2014
@@ -80,7 +80,7 @@ public class DateUtils {
*/
public static String formatDateUnknownTimezone(Date date) {
// Create the Calendar object in the system timezone
- Calendar calendar = GregorianCalendar.getInstance(Locale.US);
+ Calendar calendar =
GregorianCalendar.getInstance(TimeZone.getDefault(), Locale.US);
calendar.setTime(date);
// Have it formatted
String formatted = formatDate(calendar);
@@ -89,6 +89,7 @@ public class DateUtils {
}
private static String doFormatDate(Calendar calendar) {
return String.format(
+ Locale.ROOT,
"%04d-%02d-%02dT%02d:%02d:%02dZ",
calendar.get(Calendar.YEAR),
calendar.get(Calendar.MONTH) + 1,
Modified:
tika/branches/1.6/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
(original)
+++
tika/branches/1.6/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
Sun Aug 31 19:36:36 2014
@@ -2395,6 +2395,17 @@
<glob pattern="*.ace"/>
</mime-type>
+ <mime-type type="application/x-axcrypt">
+ <_comment>AxCrypt</_comment>
+ <glob pattern="*.axx" />
+ <magic priority="60">
+ <!-- AxCrypt block header, skip length field, then Header of type
Preamble -->
+ <match value="0xc0b9072e4f93f146a015792ca1d9e821" type="string"
offset="0">
+ <match value="2" type="big32" offset="17" />
+ </match>
+ </magic>
+ </mime-type>
+
<mime-type type="application/x-adobe-indesign">
<acronym>INDD</acronym>
<_comment>Adobe InDesign document</_comment>
@@ -3760,8 +3771,6 @@
<match value="OggS\000.......................\001vorbis" type="string"
mask="0xFFFFFFFF00000000000000000000000000000000000000000000000000FFFFFFFFFFFF"
offset="0"/>
- <match value="\x4f\x67\x67\x53\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00"
- type="string" offset="0"/>
</magic>
<glob pattern="*.ogg"/>
<sub-class-of type="audio/ogg"/>
@@ -5615,6 +5624,18 @@
<sub-class-of type="application/ogg"/>
</mime-type>
+ <mime-type type="video/daala">
+ <_comment>Ogg Daala Video</_comment>
+ <alias type="video/x-daala"/>
+ <magic priority="60">
+ <!-- Assumes Video stream comes before Audio, may not always -->
+ <match value="OggS\000.......................\x80daala" type="string"
+
mask="0xFFFFFFFF00000000000000000000000000000000000000000000000000FFFFFFFFFFFF"
+ offset="0"/>
+ </magic>
+ <sub-class-of type="video/ogg"/>
+ </mime-type>
+
<mime-type type="video/theora">
<_comment>Ogg Theora Video</_comment>
<alias type="video/x-theora"/>
Modified:
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
(original)
+++
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
Sun Aug 31 19:36:36 2014
@@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
+import java.util.Locale;
import org.apache.tika.io.IOUtils;
@@ -56,6 +57,7 @@ public class TypeDetectionBenchmark {
tika.detect(new ByteArrayInputStream(content));
}
System.out.printf(
+ Locale.ROOT,
"%6dns per Tika.detect(%s) = %s%n",
System.currentTimeMillis() - start, file, type);
} finally {
Modified:
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
(original)
+++
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
Sun Aug 31 19:36:36 2014
@@ -19,7 +19,10 @@ package org.apache.tika.io;
import org.junit.Test;
-import static org.junit.Assert.*;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
public class FilenameUtilsTest {
Modified:
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
(original)
+++
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
Sun Aug 31 19:36:36 2014
@@ -23,6 +23,7 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import org.junit.Test;
@@ -68,7 +69,11 @@ public class TailStreamTest
*/
private static InputStream generateStream(int from, int length)
{
- return new ByteArrayInputStream(generateText(from, length).getBytes());
+ try {
+ return new ByteArrayInputStream(generateText(from,
length).getBytes("UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
}
/**
@@ -123,7 +128,7 @@ public class TailStreamTest
TailStream stream = new TailStream(generateStream(0, 2 * count),
count);
readStream(stream);
assertEquals("Wrong buffer", generateText(count, count), new String(
- stream.getTail()));
+ stream.getTail(), "UTF-8"));
}
/**
@@ -144,7 +149,7 @@ public class TailStreamTest
read = stream.read(buf);
}
assertEquals("Wrong buffer", generateText(count - tailSize, tailSize),
- new String(stream.getTail()));
+ new String(stream.getTail(), "UTF-8"));
stream.close();
}
@@ -164,7 +169,7 @@ public class TailStreamTest
stream.reset();
readStream(stream);
assertEquals("Wrong buffer", generateText(tailSize, tailSize),
- new String(stream.getTail()));
+ new String(stream.getTail(), "UTF-8"));
}
/**
@@ -180,7 +185,7 @@ public class TailStreamTest
byte[] buf = new byte[count];
stream.read(buf);
assertEquals("Wrong buffer", generateText(count - tailSize, tailSize),
- new String(stream.getTail()));
+ new String(stream.getTail(), "UTF-8"));
stream.close();
}
@@ -197,7 +202,7 @@ public class TailStreamTest
assertEquals("Wrong skip result", skipCount, stream.skip(skipCount));
assertEquals("Wrong buffer",
generateText(skipCount - tailSize, tailSize),
- new String(stream.getTail()));
+ new String(stream.getTail(), "UTF-8"));
stream.close();
}
@@ -211,7 +216,7 @@ public class TailStreamTest
TailStream stream = new TailStream(generateStream(0, count), 2 *
count);
assertEquals("Wrong skip result", count, stream.skip(2 * count));
assertEquals("Wrong buffer", generateText(0, count),
- new String(stream.getTail()));
+ new String(stream.getTail(), "UTF-8"));
stream.close();
}
Modified:
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
(original)
+++
tika/branches/1.6/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
Sun Aug 31 19:36:36 2014
@@ -45,7 +45,7 @@ public class BodyContentHandlerTest {
xhtml.element("p", "Test text");
xhtml.endDocument();
- assertEquals("Test text\n", buffer.toString());
+ assertEquals("Test text\n", buffer.toString("UTF-8"));
}
}
Modified: tika/branches/1.6/tika-example/pom.xml
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-example/pom.xml?rev=1621623&r1=1621617&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-example/pom.xml (original)
+++ tika/branches/1.6/tika-example/pom.xml Sun Aug 31 19:36:36 2014
@@ -23,7 +23,7 @@
<parent>
<artifactId>tika-parent</artifactId>
<groupId>org.apache.tika</groupId>
- <version>1.7-SNAPSHOT</version>
+ <version>1.6</version>
<relativePath>../tika-parent/pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
Modified: tika/branches/1.6/tika-java7/pom.xml
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-java7/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-java7/pom.xml (original)
+++ tika/branches/1.6/tika-java7/pom.xml Sun Aug 31 19:36:36 2014
@@ -35,6 +35,11 @@
<name>Apache Tika Java-7 Components</name>
<description>Java-7 reliant components, including FileTypeDetector
implementations</description>
+ <properties>
+ <maven.compiler.source>1.7</maven.compiler.source>
+ <maven.compiler.target>1.7</maven.compiler.target>
+ </properties>
+
<build>
<plugins>
<plugin>
@@ -56,15 +61,6 @@
</instructions>
</configuration>
</plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>3.1</version>
- <configuration>
- <source>1.7</source>
- <target>1.7</target>
- </configuration>
- </plugin>
</plugins>
</build>
@@ -87,8 +83,6 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <scope>test</scope>
- <version>4.11</version>
</dependency>
</dependencies>
@@ -98,9 +92,9 @@
<url>http://www.apache.org</url>
</organization>
<scm>
- <url>http://svn.apache.org/viewvc/tika/tags/1.6/tika-java7</url>
-
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/tags/1.6/tika-java7</connection>
-
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/tags/1.6/tika-java7</developerConnection>
+ <url>http://svn.apache.org/viewvc/tika/trunk/tika-java7</url>
+
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/tika-java7</connection>
+
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/tika-java7</developerConnection>
</scm>
<issueManagement>
<system>JIRA</system>
Modified: tika/branches/1.6/tika-parent/pom.xml
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parent/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-parent/pom.xml (original)
+++ tika/branches/1.6/tika-parent/pom.xml Sun Aug 31 19:36:36 2014
@@ -242,15 +242,15 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <version>4.10</version>
+ <version>4.11</version>
<scope>test</scope>
</dependency>
</dependencies>
</dependencyManagement>
<properties>
- <maven.compile.source>1.6</maven.compile.source>
- <maven.compile.target>1.6</maven.compile.target>
+ <maven.compiler.source>1.6</maven.compiler.source>
+ <maven.compiler.target>1.6</maven.compiler.target>
<project.reporting.outputEncoding>${project.build.sourceEncoding}</project.reporting.outputEncoding>
</properties>
@@ -258,11 +258,34 @@
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
+ <version>3.1</version>
<configuration>
- <source>${maven.compile.source}</source>
- <target>${maven.compile.target}</target>
+ <source>${maven.compiler.source}</source>
+ <target>${maven.compiler.target}</target>
</configuration>
</plugin>
+ <plugin>
+ <groupId>de.thetaphi</groupId>
+ <artifactId>forbiddenapis</artifactId>
+ <version>1.6.1</version>
+ <configuration>
+ <targetVersion>${maven.compiler.target}</targetVersion>
+ <internalRuntimeForbidden>true</internalRuntimeForbidden>
+ <failOnUnsupportedJava>false</failOnUnsupportedJava>
+ <bundledSignatures>
+ <bundledSignature>jdk-unsafe</bundledSignature>
+ <bundledSignature>jdk-deprecated</bundledSignature>
+ </bundledSignatures>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>check</goal>
+ <goal>testCheck</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
<pluginManagement>
<plugins>
@@ -324,8 +347,8 @@
</profiles>
<scm>
-
<connection>scm:svn:http://svn.apache.org/repos/asf/maven/pom/tags/1.6/tika-parent</connection>
-
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/maven/pom/tags/1.6/tika-parent</developerConnection>
- <url>http://svn.apache.org/viewvc/maven/pom/tags/1.6/tika-parent</url>
+
<connection>scm:svn:http://svn.apache.org/repos/asf/maven/pom/trunk/tika-parent</connection>
+
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/maven/pom/trunk/tika-parent</developerConnection>
+ <url>http://svn.apache.org/viewvc/maven/pom/trunk/tika-parent</url>
</scm>
</project>
Modified: tika/branches/1.6/tika-parsers/pom.xml
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/pom.xml?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-parsers/pom.xml (original)
+++ tika/branches/1.6/tika-parsers/pom.xml Sun Aug 31 19:36:36 2014
@@ -44,8 +44,7 @@
<pdfbox.version>1.8.6</pdfbox.version>
</properties>
- <dependencies>
-
+ <dependencies>
<!-- Optional OSGi dependency, used only when running within OSGi -->
<dependency>
<groupId>org.osgi</groupId>
@@ -101,11 +100,12 @@
<artifactId>commons-compress</artifactId>
<version>${compress.version}</version>
</dependency>
- <dependency>
- <groupId>org.tukaani</groupId>
- <artifactId>xz</artifactId>
+ <dependency>
+ <groupId>org.tukaani</groupId>
+ <artifactId>xz</artifactId>
<version>${tukaani.version}</version>
- </dependency>
+ </dependency>
+
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
@@ -215,7 +215,6 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
@@ -311,9 +310,9 @@
<url>http://www.apache.org</url>
</organization>
<scm>
- <url>http://svn.apache.org/viewvc/tika/tags/1.6/tika-parsers</url>
-
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/tags/1.6/tika-parsers</connection>
-
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/tags/1.6/tika-parsers</developerConnection>
+ <url>http://svn.apache.org/viewvc/tika/trunk/tika-parsers</url>
+
<connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/tika-parsers</connection>
+
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/tika-parsers</developerConnection>
</scm>
<issueManagement>
<system>JIRA</system>
Modified:
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
(original)
+++
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
Sun Aug 31 19:36:36 2014
@@ -212,9 +212,9 @@ public class ChmDirectoryListingSet {
if (dir_chunk != null) {
int indexWorkData = ChmCommons.indexOf(dir_chunk,
- "::".getBytes());
+ "::".getBytes("UTF-8"));
int indexUserData = ChmCommons.indexOf(dir_chunk,
- "/".getBytes());
+ "/".getBytes("UTF-8"));
if (indexUserData < indexWorkData)
setPlaceHolder(indexUserData);
@@ -238,7 +238,7 @@ public class ChmDirectoryListingSet {
// dle.getNameLength()))));
dle.setName(new String(ChmCommons.copyOfRange(
dir_chunk, getPlaceHolder(),
- (getPlaceHolder() +
dle.getNameLength()))));
+ (getPlaceHolder() + dle.getNameLength())),
"UTF-8"));
checkControlData(dle);
checkResetTable(dle);
setPlaceHolder(getPlaceHolder()
Modified:
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
(original)
+++
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
Sun Aug 31 19:36:36 2014
@@ -16,6 +16,7 @@
*/
package org.apache.tika.parser.chm.accessor;
+import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import org.apache.tika.exception.TikaException;
@@ -42,7 +43,7 @@ import org.apache.tika.parser.chm.except
/* structure of ITSF headers */
public class ChmItsfHeader implements ChmAccessor<ChmItsfHeader> {
private static final long serialVersionUID = 2215291838533213826L;
- private byte[] signature = new String("ITSF").getBytes(); /* 0 (ITSF) */
+ private byte[] signature;
private int version; /* 4 */
private int header_len; /* 8 */
private int unknown_000c; /* c */
@@ -60,12 +61,24 @@ public class ChmItsfHeader implements Ch
private int dataRemained;
private int currentPlace = 0;
+ public ChmItsfHeader() {
+ try {
+ signature = ChmConstants.ITSF.getBytes("UTF-8"); /* 0 (ITSF) */
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
+ }
+
/**
* Prints the values of ChmfHeader
*/
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append(new String(getSignature()) + " ");
+ try {
+ sb.append(new String(getSignature(), "UTF-8") + " ");
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
sb.append(getVersion() + " ");
sb.append(getHeaderLen() + " ");
sb.append(getUnknown_000c() + " ");
@@ -458,9 +471,12 @@ public class ChmItsfHeader implements Ch
chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data,
chmItsfHeader.getUnknownLen()));
chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data,
chmItsfHeader.getDirOffset()));
chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data,
chmItsfHeader.getDirLen()));
-
- if (!new
String(chmItsfHeader.getSignature()).equals(ChmConstants.ITSF))
- throw new TikaException("seems not valid file");
+ try {
+ if (!new String(chmItsfHeader.getSignature(),
"UTF-8").equals(ChmConstants.ITSF))
+ throw new TikaException("seems not valid file");
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
throw new TikaException("something wrong with header");
Modified:
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
(original)
+++
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
Sun Aug 31 19:36:36 2014
@@ -22,6 +22,8 @@ import org.apache.tika.parser.chm.core.C
import org.apache.tika.parser.chm.core.ChmConstants;
import org.apache.tika.parser.chm.exception.ChmParsingException;
+import java.io.UnsupportedEncodingException;
+
/**
* Directory header The directory starts with a header; its format is as
* follows: 0000: char[4] 'ITSP' 0004: DWORD Version number 1 0008: DWORD
Length
@@ -45,11 +47,7 @@ import org.apache.tika.parser.chm.except
public class ChmItspHeader implements ChmAccessor<ChmItspHeader> {
// TODO: refactor all unmarshals
private static final long serialVersionUID = 1962394421998181341L;
- private byte[] signature = new String(ChmConstants.ITSP).getBytes(); /*
- * 0
- *
(ITSP
- * )
- */
+ private byte[] signature;
private int version; /* 4 */
private int header_len; /* 8 */
private int unknown_000c; /* c */
@@ -69,10 +67,26 @@ public class ChmItspHeader implements Ch
private int dataRemained;
private int currentPlace = 0;
+ public ChmItspHeader() {
+ try {
+ signature = ChmConstants.ITSP.getBytes("UTF-8"); /*
+ * 0
+ *
(ITSP
+ * )
+ */
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
+ }
+
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append("[ signature:=" + new String(getSignature())
- + System.getProperty("line.separator"));
+ try {
+ sb.append("[ signature:=" + new String(getSignature(), "UTF-8")
+ + System.getProperty("line.separator"));
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
sb.append("version:=\t" + getVersion()
+ System.getProperty("line.separator"));
sb.append("header_len:=\t" + getHeader_len()
@@ -530,9 +544,12 @@ public class ChmItspHeader implements Ch
ChmConstants.BYTE_ARRAY_LENGHT));
/* Checks validity of the itsp header */
- if (!new
String(chmItspHeader.getSignature()).equals(ChmConstants.ITSP))
- throw new ChmParsingException("seems not valid signature");
-
+ try {
+ if (!new String(chmItspHeader.getSignature(),
"UTF-8").equals(ChmConstants.ITSP))
+ throw new ChmParsingException("seems not valid signature");
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
if (chmItspHeader.getVersion() != ChmConstants.CHM_VER_1)
throw new ChmParsingException("!=ChmConstants.CHM_VER_1");
Modified:
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
(original)
+++
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
Sun Aug 31 19:36:36 2014
@@ -21,6 +21,8 @@ import org.apache.tika.parser.chm.assert
import org.apache.tika.parser.chm.core.ChmConstants;
import org.apache.tika.parser.chm.exception.ChmParsingException;
+import java.io.UnsupportedEncodingException;
+
/**
*
* ::DataSpace/Storage/<SectionName>/ControlData This file contains $20 bytes
of
@@ -40,11 +42,7 @@ public class ChmLzxcControlData implemen
private static final long serialVersionUID = -7897854774939631565L;
/* class' members */
private long size; /* 0 */
- private byte[] signature = new String(ChmConstants.LZXC).getBytes(); /*
- * 4
- *
(LZXC
- * )
- */
+ private byte[] signature;
private long version; /* 8 */
private long resetInterval; /* c */
private long windowSize; /* 10 */
@@ -55,6 +53,18 @@ public class ChmLzxcControlData implemen
private int dataRemained;
private int currentPlace = 0;
+ public ChmLzxcControlData() {
+ try {
+ signature = ChmConstants.LZXC.getBytes("UTF-8"); /*
+ * 4
+ * (LZXC
+ * )
+ */
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
+ }
+
/**
* Returns a remained data
*
@@ -247,8 +257,12 @@ public class ChmLzxcControlData implemen
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("size(unknown):=" + this.getSize() + ", ");
- sb.append("signature(Compression type identifier):="
- + new String(this.getSignature()) + ", ");
+ try {
+ sb.append("signature(Compression type identifier):="
+ + new String(this.getSignature(), "UTF-8") + ", ");
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
sb.append("version(Possibly numeric code for LZX):="
+ this.getVersion() + System.getProperty("line.separator"));
sb.append("resetInterval(The Huffman reset interval):="
@@ -299,10 +313,14 @@ public class ChmLzxcControlData implemen
"window size / resetInterval should be more than 1");
/* checks a signature */
- if (!new String(chmLzxcControlData.getSignature())
- .equals(ChmConstants.LZXC))
- throw new ChmParsingException(
- "the signature does not seem to be correct");
+ try {
+ if (!new String(chmLzxcControlData.getSignature(), "UTF-8")
+ .equals(ChmConstants.LZXC))
+ throw new ChmParsingException(
+ "the signature does not seem to be correct");
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
}
/**
Modified:
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
(original)
+++
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
Sun Aug 31 19:36:36 2014
@@ -16,6 +16,7 @@
*/
package org.apache.tika.parser.chm.accessor;
+import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import org.apache.tika.exception.TikaException;
@@ -39,21 +40,27 @@ import org.apache.tika.parser.chm.except
* <p>
* Note: This class is not in use
*
- * {@link http
- * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original
- * /?show-translation-form=1 }
+ * {@link
http://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original/?show-translation-form=1
}
*
*
*/
public class ChmPmgiHeader implements ChmAccessor<ChmPmgiHeader> {
private static final long serialVersionUID = -2092282339894303701L;
- private byte[] signature = new
String(ChmConstants.CHM_PMGI_MARKER).getBytes(); /* 0 (PMGI) */
+ private byte[] signature;
private long free_space; /* 4 */
/* local usage */
private int dataRemained;
private int currentPlace = 0;
+ public ChmPmgiHeader() {
+ try {
+ signature = ChmConstants.CHM_PMGI_MARKER.getBytes("UTF-8"); /* 0
(PMGI) */
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
+ }
+
private int getDataRemained() {
return dataRemained;
}
@@ -77,8 +84,12 @@ public class ChmPmgiHeader implements Ch
ChmAssert.assertChmAccessorNotNull(chmPmgiHeader);
ChmAssert.assertPositiveInt(count);
this.setDataRemained(data.length);
- index = ChmCommons.indexOf(data,
- ChmConstants.CHM_PMGI_MARKER.getBytes());
+ try {
+ index = ChmCommons.indexOf(data,
+ ChmConstants.CHM_PMGI_MARKER.getBytes("UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
if (index >= 0)
System.arraycopy(data, index, chmPmgiHeader.getSignature(), 0,
count);
else{
@@ -145,7 +156,11 @@ public class ChmPmgiHeader implements Ch
*/
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append("signature:=" + new String(getSignature()) + ", ");
+ try {
+ sb.append("signature:=" + new String(getSignature(), "UTF-8") + ",
");
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
sb.append("free space:=" + getFreeSpace()
+ System.getProperty("line.separator"));
return sb.toString();
@@ -162,10 +177,14 @@ public class ChmPmgiHeader implements Ch
chmPmgiHeader.setFreeSpace(chmPmgiHeader.unmarshalUInt32(data,
chmPmgiHeader.getFreeSpace()));
/* check structure */
- if (!Arrays.equals(chmPmgiHeader.getSignature(),
- ChmConstants.CHM_PMGI_MARKER.getBytes()))
- throw new TikaException(
- "it does not seem to be valid a PMGI signature, check
ChmItsp index_root if it was -1, means no PMGI, use PMGL insted");
+ try {
+ if (!Arrays.equals(chmPmgiHeader.getSignature(),
+ ChmConstants.CHM_PMGI_MARKER.getBytes("UTF-8")))
+ throw new TikaException(
+ "it does not seem to be valid a PMGI signature, check
ChmItsp index_root if it was -1, means no PMGI, use PMGL insted");
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
}
Modified:
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
(original)
+++
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
Sun Aug 31 19:36:36 2014
@@ -21,6 +21,9 @@ import org.apache.tika.parser.chm.assert
import org.apache.tika.parser.chm.core.ChmConstants;
import org.apache.tika.parser.chm.exception.ChmParsingException;
+import java.io.UnsupportedEncodingException;
+import java.util.UnknownFormatConversionException;
+
/**
* Description There are two types of directory chunks -- index chunks, and
* listing chunks. The index chunk will be omitted if there is only one listing
@@ -55,11 +58,7 @@ import org.apache.tika.parser.chm.except
*/
public class ChmPmglHeader implements ChmAccessor<ChmPmglHeader> {
private static final long serialVersionUID = -6139486487475923593L;
- private byte[] signature = new String(ChmConstants.PMGL).getBytes(); /*
- * 0
- *
(PMGL
- * )
- */
+ private byte[] signature;
private long free_space; /* 4 */
private long unknown_0008; /* 8 */
private int block_prev; /* c */
@@ -69,6 +68,18 @@ public class ChmPmglHeader implements Ch
private int dataRemained;
private int currentPlace = 0;
+ public ChmPmglHeader() {
+ try {
+ signature = ChmConstants.PMGL.getBytes("UTF-8"); /*
+ * 0
+ *
(PMGL
+ * )
+ */
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
+ }
+
private int getDataRemained() {
return dataRemained;
}
@@ -95,7 +106,11 @@ public class ChmPmglHeader implements Ch
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append("signatute:=" + new String(getSignature()) + ", ");
+ try {
+ sb.append("signatute:=" + new String(getSignature(), "UTF-8") + ",
");
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
sb.append("free space:=" + getFreeSpace() + ", ");
sb.append("unknown0008:=" + getUnknown0008() + ", ");
sb.append("prev block:=" + getBlockPrev() + ", ");
@@ -160,10 +175,13 @@ public class ChmPmglHeader implements Ch
chmPmglHeader.getBlockNext()));
/* check structure */
- if (!new
String(chmPmglHeader.getSignature()).equals(ChmConstants.PMGL))
- throw new ChmParsingException(ChmPmglHeader.class.getName()
- + " pmgl != pmgl.signature");
-
+ try {
+ if (!new String(chmPmglHeader.getSignature(),
"UTF-8").equals(ChmConstants.PMGL))
+ throw new ChmParsingException(ChmPmglHeader.class.getName()
+ + " pmgl != pmgl.signature");
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError("UTF-8 not supported.");
+ }
}
public byte[] getSignature() {
Modified:
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
(original)
+++
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
Sun Aug 31 19:36:36 2014
@@ -174,7 +174,7 @@ public class ChmExtractor {
int indexOfControlData = getChmDirList().getControlDataIndex();
int indexOfResetData = ChmCommons.indexOfResetTableBlock(getData(),
- ChmConstants.LZXC.getBytes());
+ ChmConstants.LZXC.getBytes("UTF-8"));
byte[] dir_chunk = null;
if (indexOfResetData > 0)
dir_chunk = ChmCommons.copyOfRange( getData(),
indexOfResetData, indexOfResetData
Modified:
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java
(original)
+++
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java
Sun Aug 31 19:36:36 2014
@@ -20,6 +20,7 @@ import java.io.Writer;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
+import java.util.Locale;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.sax.WriteOutContentHandler;
@@ -83,8 +84,8 @@ public class BoilerpipeContentHandler ex
@Override
public String toString() {
- return String.format("<%s> of type %s", localName, elementType);
- };
+ return String.format(Locale.ROOT, "<%s> of type %s", localName,
elementType);
+ }
public String getUri() {
return uri;
Modified:
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
URL:
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
---
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
(original)
+++
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
Sun Aug 31 19:36:36 2014
@@ -245,7 +245,9 @@ public class ImageMetadataExtractor {
}
static class ExifHandler implements DirectoryHandler {
- private static final SimpleDateFormat DATE_UNSPECIFIED_TZ = new
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+ // There's a new ExifHandler for each file processed, so this is
thread safe
+ private final SimpleDateFormat DATE_UNSPECIFIED_TZ = new
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ROOT);
+
public boolean supports(Class<? extends Directory> directoryType) {
return directoryType == ExifIFD0Directory.class ||
directoryType == ExifSubIFDDirectory.class;