[ 
https://issues.apache.org/jira/browse/TIKA-2102?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15530449#comment-15530449
 ] 

Tim Allison commented on TIKA-2102:
-----------------------------------

I propose putting this off until 1.15/2.0.  This is a critical part of Tika, 
and I don't want to rush a fix.

> SecureContentHandler incorrectly calculates bytesCount for TStreams w open 
> container
> ------------------------------------------------------------------------------------
>
>                 Key: TIKA-2102
>                 URL: https://issues.apache.org/jira/browse/TIKA-2102
>             Project: Tika
>          Issue Type: Bug
>            Reporter: Tim Allison
>            Priority: Minor
>
> On a handful of files in the latest regression run, we're now getting zip 
> bomb exceptions when an embedded document is an "open container" inside a 
> TikaInputStream.  SecureContentHandler calculates a ratio of bytes written to 
> total bytes in the stream; if we're parsing an embedded "open container" the 
> length of the inputstream is 0.
> We should add another if clause to handle parsing of open containers (?).  
> This would leave us vulnerable to zip bombs...  Is there an obvious way to 
> give a hint about length?
> {noformat}
> {org.apache.tika.exception.TikaException: Zip bomb detected!
>       at 
> org.apache.tika.sax.SecureContentHandler.throwIfCauseOf(SecureContentHandler.java:192)
>       at 
> org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:123)
>       at 
> org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:188)
>       at org.apache.tika.parser.DigestingParser.parse(DigestingParser.java:74)
>       at 
> org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:188)
>       at 
> org.apache.tika.parser.RecursiveParserWrapper$EmbeddedParserDecorator.parse(RecursiveParserWrapper.java:317)
>       at 
> org.apache.tika.parser.DelegatingParser.parse(DelegatingParser.java:72)
>       at 
> org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor.parseEmbedded(ParsingEmbeddedDocumentExtractor.java:102)
>       at 
> org.apache.tika.parser.microsoft.AbstractPOIFSExtractor.handleEmbeddedResource(AbstractPOIFSExtractor.java:141)
>       at 
> org.apache.tika.parser.microsoft.AbstractPOIFSExtractor.handleEmbeddedResource(AbstractPOIFSExtractor.java:117)
>       at 
> org.apache.tika.parser.microsoft.HSLFExtractor.handleSlideEmbeddedResources(HSLFExtractor.java:393)
>       at 
> org.apache.tika.parser.microsoft.HSLFExtractor.parse(HSLFExtractor.java:142)
>       at 
> org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:172)
>       at 
> org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:130)
>       at 
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>       at 
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>       at 
> org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)
>       at 
> org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:188)
>       at org.apache.tika.parser.DigestingParser.parse(DigestingParser.java:74)
>       at 
> org.apache.tika.parser.RecursiveParserWrapper.parse(RecursiveParserWrapper.java:158)
>       at 
> org.apache.tika.batch.FileResourceConsumer.parse(FileResourceConsumer.java:407)
>       at 
> org.apache.tika.batch.fs.RecursiveParserWrapperFSConsumer.processFileResource(RecursiveParserWrapperFSConsumer.java:104)
>       at 
> org.apache.tika.batch.FileResourceConsumer._processFileResource(FileResourceConsumer.java:182)
>       at 
> org.apache.tika.batch.FileResourceConsumer.call(FileResourceConsumer.java:115)
>       at 
> org.apache.tika.batch.FileResourceConsumer.call(FileResourceConsumer.java:50)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>       at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.tika.sax.SecureContentHandler$SecureSAXException: 
> Suspected zip bomb: 0 input bytes produced 1000016 output characters
>       at 
> org.apache.tika.sax.SecureContentHandler.advance(SecureContentHandler.java:221)
>       at 
> org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:269)
>       at 
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>       at 
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>       at 
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>       at 
> org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)
>       at 
> org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)
>       at 
> org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)
>       at 
> org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)
>       at 
> org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)
>       at 
> org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)
>       at org.apache.tika.parser.microsoft.TextCell.render(TextCell.java:34)
>       at 
> org.apache.tika.parser.microsoft.ExcelExtractor$TikaHSSFListener.processSheet(ExcelExtractor.java:585)
>       at 
> org.apache.tika.parser.microsoft.ExcelExtractor$TikaHSSFListener.internalProcessRecord(ExcelExtractor.java:385)
>       at 
> org.apache.tika.parser.microsoft.ExcelExtractor$TikaHSSFListener.processRecord(ExcelExtractor.java:336)
>       at 
> org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener.processRecord(FormatTrackingHSSFListener.java:92)
>       at 
> org.apache.poi.hssf.eventusermodel.HSSFRequest.processRecord(HSSFRequest.java:109)
>       at 
> org.apache.poi.hssf.eventusermodel.HSSFEventFactory.genericProcessEvents(HSSFEventFactory.java:179)
>       at 
> org.apache.poi.hssf.eventusermodel.HSSFEventFactory.processEvents(HSSFEventFactory.java:136)
>       at 
> org.apache.tika.parser.microsoft.ExcelExtractor$TikaHSSFListener.processFile(ExcelExtractor.java:312)
>       at 
> org.apache.tika.parser.microsoft.ExcelExtractor.parse(ExcelExtractor.java:169)
>       at 
> org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:177)
>       at 
> org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:130)
>       at 
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>       at 
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>       at 
> org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)
>       ... 29 more
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to