Author: nick Date: Thu Aug 20 09:51:44 2015 New Revision: 1696745 URL: http://svn.apache.org/r1696745 Log: TIKA-1710 patch from Yaniv Kunda - Use Commons IO instead of the Tika Core IO copies, and java.nio.charset.StandardCharsets
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dif/DIFParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubContentParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFEncodedStringDecoder.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/strings/StringsParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/XMLParser.java Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java Thu Aug 20 09:51:44 2015 @@ -41,6 +41,8 @@ import org.apache.tika.sax.XHTMLContentH import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + public class MidiParser extends AbstractParser { /** Serial version UID */ @@ -101,7 +103,7 @@ public class MidiParser extends Abstract if (meta.getType() >= 1 && meta.getType() <= 15) { // FIXME: What's the encoding? xhtml.characters( - new String(meta.getData(), "ISO-8859-1")); + new String(meta.getData(), ISO_8859_1)); } } } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java Thu Aug 20 09:51:44 2015 @@ -20,11 +20,12 @@ import java.math.BigInteger; import java.util.ArrayList; import java.util.List; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.parser.chm.core.ChmCommons; import org.apache.tika.parser.chm.core.ChmConstants; import org.apache.tika.parser.chm.exception.ChmParsingException; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Holds chm listing entries */ @@ -234,7 +235,7 @@ public class ChmDirectoryListingSet { dle.setNameLength(strlen); dle.setName(new String(ChmCommons.copyOfRange( dir_chunk, placeHolder, - (placeHolder + dle.getNameLength())), IOUtils.UTF_8)); + (placeHolder + dle.getNameLength())), UTF_8)); checkControlData(dle); checkResetTable(dle); @@ -255,9 +256,9 @@ public class ChmDirectoryListingSet { } // int indexWorkData = ChmCommons.indexOf(dir_chunk, -// "::".getBytes("UTF-8")); +// "::".getBytes(UTF_8)); // int indexUserData = ChmCommons.indexOf(dir_chunk, -// "/".getBytes("UTF-8")); +// "/".getBytes(UTF_8)); // // if (indexUserData>=0 && indexUserData < indexWorkData) // setPlaceHolder(indexUserData); @@ -285,7 +286,7 @@ public class ChmDirectoryListingSet { // // dle.getNameLength())))); // dle.setName(new String(ChmCommons.copyOfRange( // dir_chunk, placeHolder, -// (placeHolder + dle.getNameLength())), "UTF-8")); +// (placeHolder + dle.getNameLength())), UTF_8)); // checkControlData(dle); // checkResetTable(dle); // setPlaceHolder(placeHolder Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java Thu Aug 20 09:51:44 2015 @@ -19,11 +19,12 @@ package org.apache.tika.parser.chm.acces import java.math.BigInteger; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.parser.chm.assertion.ChmAssert; import org.apache.tika.parser.chm.core.ChmConstants; import org.apache.tika.parser.chm.exception.ChmParsingException; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * The Header 0000: char[4] 'ITSF' 0004: DWORD 3 (Version number) 0008: DWORD * Total header length, including header section table and following data. 000C: @@ -62,7 +63,7 @@ public class ChmItsfHeader implements Ch private int currentPlace = 0; public ChmItsfHeader() { - signature = ChmConstants.ITSF.getBytes(IOUtils.UTF_8); /* 0 (ITSF) */ + signature = ChmConstants.ITSF.getBytes(UTF_8); /* 0 (ITSF) */ } /** @@ -70,7 +71,7 @@ public class ChmItsfHeader implements Ch */ public String toString() { StringBuilder sb = new StringBuilder(); - sb.append(new String(getSignature(), IOUtils.UTF_8) + " "); + sb.append(new String(getSignature(), UTF_8) + " "); sb.append(getVersion() + " "); sb.append(getHeaderLen() + " "); sb.append(getUnknown_000c() + " "); @@ -463,7 +464,7 @@ public class ChmItsfHeader implements Ch chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownLen())); chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirOffset())); chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirLen())); - if (!new String(chmItsfHeader.getSignature(), IOUtils.UTF_8).equals(ChmConstants.ITSF)) + if (!new String(chmItsfHeader.getSignature(), UTF_8).equals(ChmConstants.ITSF)) throw new TikaException("seems not valid file"); if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) { if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN) Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java Thu Aug 20 09:51:44 2015 @@ -16,15 +16,14 @@ */ package org.apache.tika.parser.chm.accessor; -import java.io.UnsupportedEncodingException; - import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.parser.chm.assertion.ChmAssert; import org.apache.tika.parser.chm.core.ChmCommons; import org.apache.tika.parser.chm.core.ChmConstants; import org.apache.tika.parser.chm.exception.ChmParsingException; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Directory header The directory starts with a header; its format is as * follows: 0000: char[4] 'ITSP' 0004: DWORD Version number 1 0008: DWORD Length @@ -69,16 +68,16 @@ public class ChmItspHeader implements Ch private int currentPlace = 0; public ChmItspHeader() { - signature = ChmConstants.ITSP.getBytes(IOUtils.UTF_8); /* - * 0 - * (ITSP - * ) - */ + signature = ChmConstants.ITSP.getBytes(UTF_8); /* + * 0 + * (ITSP + * ) + */ } public String toString() { StringBuilder sb = new StringBuilder(); - sb.append("[ signature:=" + new String(getSignature(), IOUtils.UTF_8) + sb.append("[ signature:=" + new String(getSignature(), UTF_8) + System.getProperty("line.separator")); sb.append("version:=\t" + getVersion() + System.getProperty("line.separator")); @@ -537,7 +536,7 @@ public class ChmItspHeader implements Ch ChmConstants.BYTE_ARRAY_LENGHT)); /* Checks validity of the itsp header */ - if (!new String(chmItspHeader.getSignature(), IOUtils.UTF_8).equals(ChmConstants.ITSP)) + if (!new String(chmItspHeader.getSignature(), UTF_8).equals(ChmConstants.ITSP)) throw new ChmParsingException("seems not valid signature"); if (chmItspHeader.getVersion() != ChmConstants.CHM_VER_1) Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java Thu Aug 20 09:51:44 2015 @@ -16,14 +16,13 @@ */ package org.apache.tika.parser.chm.accessor; -import java.io.UnsupportedEncodingException; - import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.parser.chm.assertion.ChmAssert; import org.apache.tika.parser.chm.core.ChmConstants; import org.apache.tika.parser.chm.exception.ChmParsingException; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * * ::DataSpace/Storage/<SectionName>/ControlData This file contains $20 bytes of @@ -55,11 +54,11 @@ public class ChmLzxcControlData implemen private int currentPlace = 0; public ChmLzxcControlData() { - signature = ChmConstants.LZXC.getBytes(IOUtils.UTF_8); /* - * 4 - * (LZXC - * ) - */ + signature = ChmConstants.LZXC.getBytes(UTF_8); /* + * 4 + * (LZXC + * ) + */ } /** @@ -255,7 +254,7 @@ public class ChmLzxcControlData implemen StringBuilder sb = new StringBuilder(); sb.append("size(unknown):=" + this.getSize() + ", "); sb.append("signature(Compression type identifier):=" - + new String(this.getSignature(), IOUtils.UTF_8) + ", "); + + new String(this.getSignature(), UTF_8) + ", "); sb.append("version(Possibly numeric code for LZX):=" + this.getVersion() + System.getProperty("line.separator")); sb.append("resetInterval(The Huffman reset interval):=" @@ -306,7 +305,7 @@ public class ChmLzxcControlData implemen "window size / resetInterval should be more than 1"); /* checks a signature */ - if (!new String(chmLzxcControlData.getSignature(), IOUtils.UTF_8) + if (!new String(chmLzxcControlData.getSignature(), UTF_8) .equals(ChmConstants.LZXC)) throw new ChmParsingException( "the signature does not seem to be correct"); Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java Thu Aug 20 09:51:44 2015 @@ -19,12 +19,13 @@ package org.apache.tika.parser.chm.acces import java.util.Arrays; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.parser.chm.assertion.ChmAssert; import org.apache.tika.parser.chm.core.ChmCommons; import org.apache.tika.parser.chm.core.ChmConstants; import org.apache.tika.parser.chm.exception.ChmParsingException; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Description Note: not always exists An index chunk has the following format: * 0000: char[4] 'PMGI' 0004: DWORD Length of quickref/free area at end of @@ -54,7 +55,7 @@ public class ChmPmgiHeader implements Ch private int currentPlace = 0; public ChmPmgiHeader() { - signature = ChmConstants.CHM_PMGI_MARKER.getBytes(IOUtils.UTF_8); /* 0 (PMGI) */ + signature = ChmConstants.CHM_PMGI_MARKER.getBytes(UTF_8); /* 0 (PMGI) */ } private int getDataRemained() { @@ -81,7 +82,7 @@ public class ChmPmgiHeader implements Ch ChmAssert.assertPositiveInt(count); this.setDataRemained(data.length); index = ChmCommons.indexOf(data, - ChmConstants.CHM_PMGI_MARKER.getBytes(IOUtils.UTF_8)); + ChmConstants.CHM_PMGI_MARKER.getBytes(UTF_8)); if (index >= 0) System.arraycopy(data, index, chmPmgiHeader.getSignature(), 0, count); @@ -149,7 +150,7 @@ public class ChmPmgiHeader implements Ch */ public String toString() { StringBuilder sb = new StringBuilder(); - sb.append("signature:=" + new String(getSignature(), IOUtils.UTF_8) + ", "); + sb.append("signature:=" + new String(getSignature(), UTF_8) + ", "); sb.append("free space:=" + getFreeSpace() + System.getProperty("line.separator")); return sb.toString(); @@ -167,7 +168,7 @@ public class ChmPmgiHeader implements Ch /* check structure */ if (!Arrays.equals(chmPmgiHeader.getSignature(), - ChmConstants.CHM_PMGI_MARKER.getBytes(IOUtils.UTF_8))) + ChmConstants.CHM_PMGI_MARKER.getBytes(UTF_8))) throw new TikaException( "it does not seem to be valid a PMGI signature, check ChmItsp index_root if it was -1, means no PMGI, use PMGL insted"); Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java Thu Aug 20 09:51:44 2015 @@ -17,11 +17,12 @@ package org.apache.tika.parser.chm.accessor; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.parser.chm.assertion.ChmAssert; import org.apache.tika.parser.chm.core.ChmConstants; import org.apache.tika.parser.chm.exception.ChmParsingException; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Description There are two types of directory chunks -- index chunks, and * listing chunks. The index chunk will be omitted if there is only one listing @@ -67,11 +68,11 @@ public class ChmPmglHeader implements Ch private int currentPlace = 0; public ChmPmglHeader() { - signature = ChmConstants.PMGL.getBytes(IOUtils.UTF_8); /* - * 0 - * (PMGL - * ) - */ + signature = ChmConstants.PMGL.getBytes(UTF_8); /* + * 0 + * (PMGL + * ) + */ } private int getDataRemained() { @@ -103,7 +104,7 @@ public class ChmPmglHeader implements Ch public String toString() { StringBuilder sb = new StringBuilder(); - sb.append("signatute:=" + new String(getSignature(), IOUtils.UTF_8) + ", "); + sb.append("signatute:=" + new String(getSignature(), UTF_8) + ", "); sb.append("free space:=" + getFreeSpace() + ", "); sb.append("unknown0008:=" + getUnknown0008() + ", "); sb.append("prev block:=" + getBlockPrev() + ", "); @@ -166,7 +167,7 @@ public class ChmPmglHeader implements Ch chmPmglHeader.setBlockNext(chmPmglHeader.unmarshalInt32(data)); /* check structure */ - if (!new String(chmPmglHeader.getSignature(), IOUtils.UTF_8).equals(ChmConstants.PMGL)) + if (!new String(chmPmglHeader.getSignature(), UTF_8).equals(ChmConstants.PMGL)) throw new ChmParsingException(ChmPmglHeader.class.getName() + " pmgl != pmgl.signature"); } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java Thu Aug 20 09:51:44 2015 @@ -16,14 +16,14 @@ */ package org.apache.tika.parser.chm.core; -import org.apache.tika.io.IOUtils; +import static java.nio.charset.StandardCharsets.UTF_8; public class ChmConstants { /* Prevents instantiation */ private ChmConstants() { } - public static final String DEFAULT_CHARSET = IOUtils.UTF_8.name(); + public static final String DEFAULT_CHARSET = UTF_8.name(); public static final String ITSF = "ITSF"; public static final String ITSP = "ITSP"; public static final String PMGL = "PMGL"; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java Thu Aug 20 09:51:44 2015 @@ -21,8 +21,9 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; + +import org.apache.commons.io.IOUtils; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet; import org.apache.tika.parser.chm.accessor.ChmItsfHeader; import org.apache.tika.parser.chm.accessor.ChmItspHeader; @@ -34,6 +35,8 @@ import org.apache.tika.parser.chm.core.C import org.apache.tika.parser.chm.lzx.ChmBlockInfo; import org.apache.tika.parser.chm.lzx.ChmLzxBlock; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Extracts text from chm file. Enumerates chm entries. */ @@ -172,7 +175,7 @@ public class ChmExtractor { int indexOfControlData = getChmDirList().getControlDataIndex(); int indexOfResetData = ChmCommons.indexOfResetTableBlock(getData(), - ChmConstants.LZXC.getBytes(IOUtils.UTF_8)); + ChmConstants.LZXC.getBytes(UTF_8)); byte[] dir_chunk = null; if (indexOfResetData > 0) dir_chunk = ChmCommons.copyOfRange( getData(), indexOfResetData, indexOfResetData Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java Thu Aug 20 09:51:44 2015 @@ -30,10 +30,10 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.config.ServiceLoader; import org.apache.tika.detect.AutoDetectReader; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.mime.MediaType; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java Thu Aug 20 09:51:44 2015 @@ -20,8 +20,8 @@ import java.io.IOException; import java.io.InputStream; import java.util.Set; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java Thu Aug 20 09:51:44 2015 @@ -22,7 +22,7 @@ import java.io.OutputStream; import java.io.Serializable; import java.util.Properties; -import org.apache.tika.io.NullOutputStream; +import static org.apache.commons.io.output.NullOutputStream.NULL_OUTPUT_STREAM; /** * Configuration for {@see CTAKESContentHandler}. @@ -51,7 +51,7 @@ public class CTAKESConfig implements Ser private CTAKESSerializer serializerType = CTAKESSerializer.XMI; // OutputStream object used for CAS serialization - private OutputStream stream = NullOutputStream.NULL_OUTPUT_STREAM; + private OutputStream stream = NULL_OUTPUT_STREAM; // Enables CAS serialization private boolean serialize = false; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dif/DIFParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dif/DIFParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dif/DIFParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dif/DIFParser.java Thu Aug 20 09:51:44 2015 @@ -23,13 +23,12 @@ import java.util.Collections; import java.util.HashSet; import java.util.Set; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.dif.DIFContentHandler; import org.apache.tika.sax.EmbeddedContentHandler; import org.apache.tika.sax.OfflineContentHandler; import org.apache.tika.sax.TaggedContentHandler; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java Thu Aug 20 09:51:44 2015 @@ -23,8 +23,8 @@ import java.util.Collections; import java.util.Set; import java.nio.charset.Charset; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.detect.AutoDetectReader; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubContentParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubContentParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubContentParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubContentParser.java Thu Aug 20 09:51:44 2015 @@ -26,8 +26,8 @@ import javax.xml.parsers.ParserConfigura import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java Thu Aug 20 09:51:44 2015 @@ -25,8 +25,8 @@ import java.util.Set; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; +import org.apache.commons.io.IOUtils; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; @@ -40,6 +40,8 @@ import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Epub parser */ @@ -93,7 +95,7 @@ public class EpubParser extends Abstract ZipEntry entry = zip.getNextEntry(); while (entry != null) { if (entry.getName().equals("mimetype")) { - String type = IOUtils.toString(zip, IOUtils.UTF_8.name()); + String type = IOUtils.toString(zip, UTF_8); metadata.set(Metadata.CONTENT_TYPE, type); } else if (entry.getName().equals("metadata.xml")) { meta.parse(zip, new DefaultHandler(), metadata, context); Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java Thu Aug 20 09:51:44 2015 @@ -23,8 +23,8 @@ import java.util.Collections; import java.util.HashSet; import java.util.Set; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.mime.MediaType; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java Thu Aug 20 09:51:44 2015 @@ -31,7 +31,6 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; @@ -43,6 +42,7 @@ import org.apache.tika.sax.XHTMLContentH import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.tika.parser.external.ExternalParser.INPUT_FILE_TOKEN; //Tika imports @@ -384,7 +384,7 @@ public class GDALParser extends Abstract private String extractOutput(InputStream stream) throws SAXException, IOException { StringBuilder sb = new StringBuilder(); - Reader reader = new InputStreamReader(stream, IOUtils.UTF_8); + Reader reader = new InputStreamReader(stream, UTF_8); try { char[] buffer = new char[1024]; for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) { @@ -399,8 +399,8 @@ public class GDALParser extends Abstract private void processOutput(ContentHandler handler, Metadata metadata, String output) throws SAXException, IOException { XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); - InputStream stream = new ByteArrayInputStream(output.getBytes(IOUtils.UTF_8)); - Reader reader = new InputStreamReader(stream, IOUtils.UTF_8); + InputStream stream = new ByteArrayInputStream(output.getBytes(UTF_8)); + Reader reader = new InputStreamReader(stream, UTF_8); try { xhtml.startDocument(); xhtml.startElement("p"); Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java Thu Aug 20 09:51:44 2015 @@ -35,6 +35,8 @@ import opennlp.tools.util.Span; import org.apache.commons.io.IOUtils; +import static java.nio.charset.StandardCharsets.UTF_8; + public class NameEntityExtractor { private String nerModelPath = null; ArrayList<String> locationNameEntities; @@ -63,7 +65,7 @@ public class NameEntityExtractor { InputStream modelIn = new FileInputStream(nerModelPath); TokenNameFinderModel model = new TokenNameFinderModel(modelIn); NameFinderME nameFinder = new NameFinderME(model); - String[] in = IOUtils.toString(stream, "UTF-8").split(" "); + String[] in = IOUtils.toString(stream, UTF_8).split(" "); Span nameE[] = nameFinder.find(in); Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java Thu Aug 20 09:51:44 2015 @@ -24,8 +24,8 @@ import java.io.InputStream; import java.util.Collections; import java.util.Set; +import org.apache.commons.io.IOUtils; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java Thu Aug 20 09:51:44 2015 @@ -24,10 +24,10 @@ import java.util.Collections; import java.util.HashSet; import java.util.Set; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.config.ServiceLoader; import org.apache.tika.detect.AutoDetectReader; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java Thu Aug 20 09:51:44 2015 @@ -29,8 +29,8 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Set; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Property; import org.apache.tika.metadata.TikaCoreProperties; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java Thu Aug 20 09:51:44 2015 @@ -18,7 +18,6 @@ package org.apache.tika.parser.image; import java.io.IOException; import java.io.InputStream; -import java.io.UnsupportedEncodingException; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; @@ -38,6 +37,8 @@ import org.apache.tika.sax.XHTMLContentH import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; +import static java.nio.charset.StandardCharsets.US_ASCII; + /** * Parser for the Adobe Photoshop PSD File Format. * <p/> @@ -193,11 +194,7 @@ public class PSDParser extends AbstractP private String getDataAsString() { // Will be null padded - try { - return new String(data, 0, data.length - 1, "ASCII"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException("Something is very broken in your JVM!"); - } + return new String(data, 0, data.length - 1, US_ASCII); } } } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java Thu Aug 20 09:51:44 2015 @@ -27,15 +27,16 @@ import java.util.List; import org.apache.jempbox.xmp.XMPMetadata; import org.apache.jempbox.xmp.XMPSchemaDublinCore; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.xml.sax.InputSource; +import static java.nio.charset.StandardCharsets.UTF_8; + public class JempboxExtractor { // The XMP spec says it must be unicode, but for most file formats it specifies "must be encoded in UTF-8" - private static final String DEFAULT_XMP_CHARSET = IOUtils.UTF_8.name(); + private static final String DEFAULT_XMP_CHARSET = UTF_8.name(); private XMPPacketScanner scanner = new XMPPacketScanner(); private Metadata metadata; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java Thu Aug 20 09:51:44 2015 @@ -22,7 +22,8 @@ package org.apache.tika.parser.image.xmp import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.UnsupportedEncodingException; + +import static java.nio.charset.StandardCharsets.US_ASCII; /** * This class is a parser for XMP packets. By default, it tries to locate the first XMP packet @@ -41,13 +42,9 @@ public class XMPPacketScanner { private static final byte[] PACKET_TRAILER; static { - try { - PACKET_HEADER = "<?xpacket begin=".getBytes("US-ASCII"); - PACKET_HEADER_END = "?>".getBytes("US-ASCII"); - PACKET_TRAILER = "<?xpacket".getBytes("US-ASCII"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException("Incompatible JVM! US-ASCII encoding not supported."); - } + PACKET_HEADER = "<?xpacket begin=".getBytes(US_ASCII); + PACKET_HEADER_END = "?>".getBytes(US_ASCII); + PACKET_TRAILER = "<?xpacket".getBytes(US_ASCII); } private static boolean skipAfter(InputStream in, byte[] match) throws IOException { Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java Thu Aug 20 09:51:44 2015 @@ -28,7 +28,6 @@ import java.util.Set; import java.util.TimeZone; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.mime.MediaType; @@ -38,6 +37,8 @@ import org.apache.tika.sax.XHTMLContentH import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Parser for IPTC ANPA New Wire Feeds */ @@ -162,7 +163,7 @@ public class IptcAnpaParser implements P } int msgsize = is.read(buf); // read in at least the full data - String message = (new String(buf, IOUtils.UTF_8)).toLowerCase(Locale.ROOT); + String message = (new String(buf, UTF_8)).toLowerCase(Locale.ROOT); // these are not if-then-else, because we want to go from most common // and fall through to least. this is imperfect, as these tags could // show up in other agency stories, but i can't find a spec or any Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java Thu Aug 20 09:51:44 2015 @@ -28,10 +28,10 @@ import java.util.Map; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.config.ServiceLoader; import org.apache.tika.detect.AutoDetectReader; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java Thu Aug 20 09:51:44 2015 @@ -30,9 +30,9 @@ import org.apache.commons.compress.archi import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.detect.XmlRootExtractor; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java Thu Aug 20 09:51:44 2015 @@ -24,10 +24,10 @@ import java.sql.SQLException; import java.util.List; import java.util.Set; +import org.apache.commons.io.IOExceptionWithCause; import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor; -import org.apache.tika.io.IOExceptionWithCause; import org.apache.tika.metadata.Database; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java Thu Aug 20 09:51:44 2015 @@ -31,12 +31,12 @@ import java.sql.Types; import java.util.LinkedList; import java.util.List; +import org.apache.commons.io.FilenameUtils; +import org.apache.commons.io.IOExceptionWithCause; +import org.apache.commons.io.IOUtils; import org.apache.tika.config.TikaConfig; import org.apache.tika.detect.Detector; import org.apache.tika.extractor.EmbeddedDocumentExtractor; -import org.apache.tika.io.FilenameUtils; -import org.apache.tika.io.IOExceptionWithCause; -import org.apache.tika.io.IOUtils; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Database; import org.apache.tika.metadata.Metadata; @@ -52,6 +52,8 @@ import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * General base class to iterate through rows of a JDBC table */ @@ -185,7 +187,7 @@ class JDBCTableReader { //is there a more efficient way to go from a Reader to an InputStream? String s = clob.getSubString(0, readSize); EmbeddedDocumentExtractor ex = AbstractDBParser.getEmbeddedDocumentExtractor(context); - ex.parseEmbedded(new ByteArrayInputStream(s.getBytes("UTF-8")), handler, m, true); + ex.parseEmbedded(new ByteArrayInputStream(s.getBytes(UTF_8)), handler, m, true); } protected void handleBlob(String tableName, String columnName, int rowNum, ResultSet resultSet, int columnIndex, Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java Thu Aug 20 09:51:44 2015 @@ -27,7 +27,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Set; -import org.apache.tika.io.IOExceptionWithCause; +import org.apache.commons.io.IOExceptionWithCause; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java Thu Aug 20 09:51:44 2015 @@ -21,11 +21,11 @@ import java.io.InputStream; import java.util.Collections; import java.util.Set; +import org.apache.commons.io.input.TaggedInputStream; import org.apache.james.mime4j.MimeException; import org.apache.james.mime4j.parser.MimeStreamParser; import org.apache.james.mime4j.stream.MimeConfig; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.TaggedInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; @@ -73,7 +73,9 @@ public class RFC822Parser extends Abstra xhtml, metadata, context, config.isStrictParsing()); parser.setContentHandler(mch); parser.setContentDecoding(true); - TaggedInputStream tagged = TaggedInputStream.get(stream); + TaggedInputStream tagged = stream instanceof TaggedInputStream + ? (TaggedInputStream)stream + : new TaggedInputStream(stream); try { parser.parse(tagged); } catch (IOException e) { Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java Thu Aug 20 09:51:44 2015 @@ -24,7 +24,6 @@ import java.util.Set; import java.util.Map; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.AbstractParser; @@ -40,6 +39,8 @@ import com.jmatio.io.MatFileReader; import com.jmatio.types.MLArray; import com.jmatio.types.MLStructure; +import static java.nio.charset.StandardCharsets.UTF_8; + public class MatParser extends AbstractParser { @@ -87,7 +88,7 @@ public class MatParser extends AbstractP } // Get endian indicator from header file - String endianBytes = new String(hdr.getEndianIndicator(), IOUtils.UTF_8); // Retrieve endian bytes and convert to string + String endianBytes = new String(hdr.getEndianIndicator(), UTF_8); // Retrieve endian bytes and convert to string String endianCode = String.valueOf(endianBytes.toCharArray()); // Convert bytes to characters to string metadata.set("endian", endianCode); Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java Thu Aug 20 09:51:44 2015 @@ -17,6 +17,7 @@ package org.apache.tika.parser.mbox; import static java.lang.String.valueOf; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Collections.singleton; import java.io.ByteArrayInputStream; @@ -32,7 +33,6 @@ import com.pff.PSTMessage; import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor; -import org.apache.tika.io.IOUtils; import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; @@ -153,7 +153,7 @@ public class OutlookPSTParser extends Ab mailMetadata.set("priority", valueOf(pstMail.getPriority())); mailMetadata.set("flagged", valueOf(pstMail.isFlagged())); - byte[] mailContent = pstMail.getBody().getBytes(IOUtils.UTF_8); + byte[] mailContent = pstMail.getBody().getBytes(UTF_8); embeddedExtractor.parseEmbedded(new ByteArrayInputStream(mailContent), handler, mailMetadata, true); } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java Thu Aug 20 09:51:44 2015 @@ -40,7 +40,6 @@ import com.healthmarketscience.jackcess. import com.healthmarketscience.jackcess.util.OleBlob; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.OfficeOpenXMLExtended; @@ -51,6 +50,8 @@ import org.apache.tika.sax.BodyContentHa import org.apache.tika.sax.XHTMLContentHandler; import org.xml.sax.SAXException; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Internal class. Needs to be instantiated for each parse because of * the lack of thread safety with the dateTimeFormatter @@ -198,7 +199,7 @@ class JackcessExtractor extends Abstract Metadata m = new Metadata(); m.set(Metadata.CONTENT_TYPE, "text/html; charset=UTF-8"); try { - htmlParser.parse(new ByteArrayInputStream(v.getBytes(IOUtils.UTF_8)), + htmlParser.parse(new ByteArrayInputStream(v.getBytes(UTF_8)), h, m, EMPTY_PARSE_CONTEXT); handler.characters(h.toString()); Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java Thu Aug 20 09:51:44 2015 @@ -25,6 +25,7 @@ import java.util.HashSet; import java.util.Locale; import java.util.Set; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.apache.poi.hpbf.extractor.PublisherTextExtractor; import org.apache.poi.poifs.crypt.Decryptor; @@ -36,7 +37,6 @@ import org.apache.poi.poifs.filesystem.N import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.tika.exception.EncryptedDocumentException; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java Thu Aug 20 09:51:44 2015 @@ -45,7 +45,6 @@ import org.apache.poi.poifs.filesystem.D import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.util.CodePageUtil; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; @@ -61,6 +60,8 @@ import org.apache.tika.sax.EmbeddedConte import org.apache.tika.sax.XHTMLContentHandler; import org.xml.sax.SAXException; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Outlook Message Parser. */ @@ -341,7 +342,7 @@ public class OutlookExtractor extends Ab Charset charset = null; try { charset = detector.detect(new ByteArrayInputStream( - html.getBytes(IOUtils.UTF_8)), EMPTY_METADATA); + html.getBytes(UTF_8)), EMPTY_METADATA); } catch (IOException e) { //swallow } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java Thu Aug 20 09:51:44 2015 @@ -26,6 +26,7 @@ import java.util.HashSet; import java.util.Set; import java.util.regex.Pattern; +import org.apache.commons.io.IOUtils; import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DocumentInputStream; @@ -33,7 +34,6 @@ import org.apache.poi.poifs.filesystem.D import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.tika.detect.Detector; -import org.apache.tika.io.IOUtils; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java Thu Aug 20 09:51:44 2015 @@ -47,13 +47,14 @@ import org.apache.poi.poifs.filesystem.D import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.io.TikaInputStream; import org.apache.tika.parser.ParseContext; import org.apache.tika.sax.XHTMLContentHandler; import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; +import static java.nio.charset.StandardCharsets.UTF_8; + public class WordExtractor extends AbstractPOIFSExtractor { private static final char UNICODECHAR_NONBREAKING_HYPHEN = '\u2011'; @@ -298,7 +299,7 @@ public class WordExtractor extends Abstr CharacterRun cr = p.getCharacterRun(j); // FIELD_BEGIN_MARK: - if (cr.text().getBytes(IOUtils.UTF_8)[0] == 0x13) { + if (cr.text().getBytes(UTF_8)[0] == 0x13) { Field field = document.getFields().getFieldByStartOffset(docPart, cr.getStartOffset()); // 58 is an embedded document // 56 is a document link Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java Thu Aug 20 09:51:44 2015 @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.Locale; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.extractor.ExtractorFactory; @@ -33,7 +34,6 @@ import org.apache.poi.xssf.extractor.XSS import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java Thu Aug 20 09:51:44 2015 @@ -18,7 +18,6 @@ package org.apache.tika.parser.mp3; import java.io.IOException; import java.io.InputStream; -import java.io.UnsupportedEncodingException; import java.util.Arrays; import java.util.List; @@ -26,6 +25,8 @@ import org.apache.tika.exception.TikaExc import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + /** * This is used to parse ID3 Version 1 Tag information from an MP3 file, * if available. @@ -177,10 +178,6 @@ public class ID3v1Handler implements ID3 } // Return the remaining substring - try { - return new String(buffer, start, end - start, "ISO-8859-1"); - } catch (UnsupportedEncodingException e) { - throw new TikaException("ISO-8859-1 encoding is not available", e); - } + return new String(buffer, start, end - start, ISO_8859_1); } } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java Thu Aug 20 09:51:44 2015 @@ -24,6 +24,8 @@ import java.util.Iterator; import org.apache.tika.parser.mp3.ID3Tags.ID3Comment; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + /** * A frame of ID3v2 data, which is then passed to a handler to * be turned into useful data. @@ -331,12 +333,7 @@ public class ID3v2Frame implements MP3Fr * offset and length. Strings are ISO-8859-1 */ protected static String getString(byte[] data, int offset, int length) { - try { - return new String(data, offset, length, "ISO-8859-1"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException( - "Core encoding ISO-8859-1 encoding is not available", e); - } + return new String(data, offset, length, ISO_8859_1); } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java Thu Aug 20 09:51:44 2015 @@ -20,10 +20,12 @@ import java.io.IOException; import java.io.InputStream; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; +import static java.nio.charset.StandardCharsets.US_ASCII; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * This is used to parse Lyrics3 tag information * from an MP3 file, if available. @@ -83,12 +85,12 @@ public class LyricsHandler { // size including the LYRICSBEGIN but excluding the // length+LYRICS200 at the end. int length = Integer.parseInt( - new String(tagData, lookat-6, 6, IOUtils.UTF_8) + new String(tagData, lookat-6, 6, UTF_8) ); String lyrics = new String( tagData, lookat-length+5, length-11, - "ASCII" + US_ASCII ); // Tags are a 3 letter code, 5 digit length, then data Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java Thu Aug 20 09:51:44 2015 @@ -40,9 +40,9 @@ import java.util.concurrent.FutureTask; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import org.apache.commons.io.IOUtils; import org.apache.commons.logging.LogFactory; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; @@ -60,6 +60,8 @@ import org.apache.tika.sax.XHTMLContentH import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * TesseractOCRParser powered by tesseract-ocr engine. To enable this parser, * create a {@link TesseractOCRConfig} object and pass it through a @@ -288,7 +290,7 @@ public class TesseractOCRParser extends */ private void extractOutput(InputStream stream, XHTMLContentHandler xhtml) throws SAXException, IOException { - Reader reader = new InputStreamReader(stream, IOUtils.UTF_8); + Reader reader = new InputStreamReader(stream, UTF_8); xhtml.startDocument(); xhtml.startElement("div"); try { @@ -312,7 +314,7 @@ public class TesseractOCRParser extends private void logStream(final String logType, final InputStream stream, final File file) { new Thread() { public void run() { - Reader reader = new InputStreamReader(stream, IOUtils.UTF_8); + Reader reader = new InputStreamReader(stream, UTF_8); StringBuilder out = new StringBuilder(); char[] buffer = new char[1024]; try { Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java Thu Aug 20 09:51:44 2015 @@ -31,8 +31,8 @@ import java.util.Map; import java.util.Set; import java.util.Stack; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java Thu Aug 20 09:51:44 2015 @@ -27,8 +27,8 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import java.util.zip.ZipInputStream; +import org.apache.commons.io.IOUtils; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; @@ -41,6 +41,8 @@ import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * OpenOffice parser */ @@ -180,7 +182,7 @@ public class OpenDocumentParser extends if (entry == null) return; if (entry.getName().equals("mimetype")) { - String type = IOUtils.toString(zip, IOUtils.UTF_8.name()); + String type = IOUtils.toString(zip, UTF_8); metadata.set(Metadata.CONTENT_TYPE, type); } else if (entry.getName().equals(META_NAME)) { meta.parse(zip, new DefaultHandler(), metadata, context); Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java Thu Aug 20 09:51:44 2015 @@ -29,6 +29,8 @@ import java.util.Locale; import java.util.Map; import java.util.TreeMap; +import org.apache.commons.io.IOExceptionWithCause; +import org.apache.commons.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; @@ -63,8 +65,6 @@ import org.apache.pdfbox.util.TextPositi import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor; -import org.apache.tika.io.IOExceptionWithCause; -import org.apache.tika.io.IOUtils; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFEncodedStringDecoder.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFEncodedStringDecoder.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFEncodedStringDecoder.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFEncodedStringDecoder.java Thu Aug 20 09:51:44 2015 @@ -24,6 +24,8 @@ import java.io.InputStream; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdfparser.BaseParser; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + /** * In fairly rare cases, a PDF's XMP will contain a string that * has incorrectly been encoded with PDFEncoding: an octal for non-ascii and @@ -77,7 +79,7 @@ class PDFEncodedStringDecoder { */ String decode(String value) { try { - byte[] bytes = new String("(" + value + ")").getBytes("ISO-8859-1"); + byte[] bytes = new String("(" + value + ")").getBytes(ISO_8859_1); InputStream is = new ByteArrayInputStream(bytes); COSStringParser p = new COSStringParser(is); String parsed = p.myParseCOSString(); Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java Thu Aug 20 09:51:44 2015 @@ -25,6 +25,7 @@ import java.util.List; import java.util.Locale; import java.util.Set; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.jempbox.xmp.XMPSchema; import org.apache.jempbox.xmp.XMPSchemaDublinCore; import org.apache.jempbox.xmp.pdfa.XMPSchemaPDFAId; @@ -44,7 +45,6 @@ import org.apache.pdfbox.pdmodel.font.PD import org.apache.tika.exception.EncryptedDocumentException; import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.AccessPermissions; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java Thu Aug 20 09:51:44 2015 @@ -32,10 +32,10 @@ import org.apache.commons.compress.compr import org.apache.commons.compress.compressors.gzip.GzipUtils; import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream; import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java Thu Aug 20 09:51:44 2015 @@ -38,11 +38,11 @@ import org.apache.commons.compress.archi import org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException; import org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException.Feature; import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.apache.commons.io.input.CloseShieldInputStream; import org.apache.tika.exception.EncryptedDocumentException; import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor; -import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java Thu Aug 20 09:51:44 2015 @@ -35,6 +35,7 @@ import org.apache.commons.compress.archi import org.apache.commons.compress.compressors.CompressorException; import org.apache.commons.compress.compressors.CompressorInputStream; import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.io.IOUtils; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackageAccess; @@ -43,7 +44,6 @@ import org.apache.poi.openxml4j.opc.Pack import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.tika.detect.Detector; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.IOUtils; import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; @@ -51,6 +51,8 @@ import org.apache.tika.mime.MediaType; import org.apache.tika.parser.iwork.IWorkPackageParser; import org.apache.tika.parser.iwork.IWorkPackageParser.IWORKDocumentType; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * A detector that works on Zip documents and other archive and compression * formats to figure out exactly what the file is. @@ -188,7 +190,7 @@ public class ZipContainerDetector implem if (mimetype != null) { InputStream stream = zip.getInputStream(mimetype); try { - return MediaType.parse(IOUtils.toString(stream, IOUtils.UTF_8.name())); + return MediaType.parse(IOUtils.toString(stream, UTF_8)); } finally { stream.close(); } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java Thu Aug 20 09:51:44 2015 @@ -34,6 +34,8 @@ import org.apache.tika.sax.XHTMLContentH import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; +import static java.nio.charset.StandardCharsets.US_ASCII; + /** * A basic text extracting parser for the CADKey PRT (CAD Drawing) * format. It outputs text from note entries. @@ -80,7 +82,7 @@ public class PRTParser extends AbstractP byte[] date = new byte[12]; IOUtils.readFully(stream, date); - String dateStr = new String(date, "ASCII"); + String dateStr = new String(date, US_ASCII); if(dateStr.startsWith("19") || dateStr.startsWith("20")) { String formattedDate = dateStr.substring(0, 4) + "-" + dateStr.substring(4,6) + "-" + dateStr.substring(6,8) + "T" + dateStr.substring(8,10) + ":" + Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java?rev=1696745&r1=1696744&r2=1696745&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java Thu Aug 20 09:51:44 2015 @@ -21,12 +21,12 @@ import java.io.IOException; import java.io.InputStream; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.commons.io.FilenameUtils; import org.apache.tika.config.TikaConfig; import org.apache.tika.detect.Detector; import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor; -import org.apache.tika.io.FilenameUtils; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.RTFMetadata;