Bhanuprakash Prathap created SOLR-9022:
------------------------------------------
Summary: solr unable to handle/parse images when they are embedded
in office docs(like word,xls,etc)
Key: SOLR-9022
URL: https://issues.apache.org/jira/browse/SOLR-9022
Project: Solr
Issue Type: Bug
Components: SolrJ
Affects Versions: 5.4.1
Reporter: Bhanuprakash Prathap
As we are trying to index multiple files, the solr throws below exception
whenever it encounters embedded images with other docs.
The issues arises as embedded images files are read with MIME type of
binary(attachment_mimetype=[application/octet-stream]) though the attached
files are type png/txt etc.
Full stack trace for this issue
2016-04-20 16:55:13,311 INFO [Thread-52]
org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator: Added
attempt_1460872551948_58233_m_000116_1 to list of failed maps
2016-04-20 16:55:13,329 INFO [IPC Server handler 18 on 39376]
org.apache.hadoop.mapred.TaskAttemptListenerImpl: Progress of TaskAttempt
attempt_1460872551948_58233_m_000159_0 is : 0.0
2016-04-20 16:55:13,342 ERROR [IPC Server handler 20 on 39376]
org.apache.hadoop.mapred.TaskAttemptListenerImpl: Task:
attempt_1460872551948_58233_m_000159_0 - exited :
org.kitesdk.morphline.api.MorphlineRuntimeException:
org.kitesdk.morphline.api.MorphlineRuntimeException: tryRules command found no
successful rule for record: {_attachment_body=[TikaInputStream of
java.io.BufferedInputStream@391f1777],
_attachment_mimetype=[application/octet-stream],
_attachment_name=[xl/media/image2.png],
id=[185be63a-e527-4953-9a3d-ae957dc0fa51]}
at
org.kitesdk.morphline.base.FaultTolerance.handleException(FaultTolerance.java:73)
at
org.apache.solr.hadoop.morphline.MorphlineMapRunner.map(MorphlineMapRunner.java:220)
at
org.apache.solr.hadoop.morphline.MorphlineMapper.map(MorphlineMapper.java:86)
at
org.apache.solr.hadoop.morphline.MorphlineMapper.map(MorphlineMapper.java:54)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:163)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1671)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Caused by: org.kitesdk.morphline.api.MorphlineRuntimeException: tryRules
command found no successful rule for record: {_attachment_body=[TikaInputStream
of java.io.BufferedInputStream@391f1777],
_attachment_mimetype=[application/octet-stream],
_attachment_name=[xl/media/image2.png],
id=[185be63a-e527-4953-9a3d-ae957dc0fa51]}
at
org.kitesdk.morphline.stdlib.TryRulesBuilder$TryRules.doProcess(TryRulesBuilder.java:132)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.tika.DetectMimeTypeBuilder$DetectMimeType.doProcess(DetectMimeTypeBuilder.java:166)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.stdlib.SeparateAttachmentsBuilder$SeparateAttachments.doProcess(SeparateAttachmentsBuilder.java:79)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.stdlib.GenerateUUIDBuilder$GenerateUUID.doProcess(GenerateUUIDBuilder.java:98)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.tika.decompress.EmbeddedExtractor.parseEmbedded(EmbeddedExtractor.java:57)
at
org.kitesdk.morphline.tika.decompress.UnpackBuilder$Unpack.parseEntry(UnpackBuilder.java:138)
at
org.kitesdk.morphline.tika.decompress.UnpackBuilder$Unpack.doProcess(UnpackBuilder.java:113)
at
org.kitesdk.morphline.stdio.AbstractParser.doProcess(AbstractParser.java:96)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.stdlib.LogDebugBuilder$LogDebug.doProcess(LogDebugBuilder.java:58)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at
org.kitesdk.morphline.stdlib.TryRulesBuilder$TryRules.doProcess(TryRulesBuilder.java:115)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.tika.DetectMimeTypeBuilder$DetectMimeType.doProcess(DetectMimeTypeBuilder.java:166)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.stdlib.SeparateAttachmentsBuilder$SeparateAttachments.doProcess(SeparateAttachmentsBuilder.java:79)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at
org.apache.solr.hadoop.morphline.MorphlineMapRunner.map(MorphlineMapRunner.java:208)
... 10 more
2016-04-20 16:55:13,343 INFO [IPC Server handler 20 on 39376]
org.apache.hadoop.mapred.TaskAttemptListenerImpl: Diagnostics report from
attempt_1460872551948_58233_m_000159_0: Error:
org.kitesdk.morphline.api.MorphlineRuntimeException:
org.kitesdk.morphline.api.MorphlineRuntimeException: tryRules command found no
successful rule for record: {_attachment_body=[TikaInputStream of
java.io.BufferedInputStream@391f1777],
_attachment_mimetype=[application/octet-stream],
_attachment_name=[xl/media/image2.png],
id=[185be63a-e527-4953-9a3d-ae957dc0fa51]}
at
org.kitesdk.morphline.base.FaultTolerance.handleException(FaultTolerance.java:73)
at
org.apache.solr.hadoop.morphline.MorphlineMapRunner.map(MorphlineMapRunner.java:220)
at
org.apache.solr.hadoop.morphline.MorphlineMapper.map(MorphlineMapper.java:86)
at
org.apache.solr.hadoop.morphline.MorphlineMapper.map(MorphlineMapper.java:54)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:163)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1671)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Caused by: org.kitesdk.morphline.api.MorphlineRuntimeException: tryRules
command found no successful rule for record: {_attachment_body=[TikaInputStream
of java.io.BufferedInputStream@391f1777],
_attachment_mimetype=[application/octet-stream],
_attachment_name=[xl/media/image2.png],
id=[185be63a-e527-4953-9a3d-ae957dc0fa51]}
at
org.kitesdk.morphline.stdlib.TryRulesBuilder$TryRules.doProcess(TryRulesBuilder.java:132)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.tika.DetectMimeTypeBuilder$DetectMimeType.doProcess(DetectMimeTypeBuilder.java:166)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.stdlib.SeparateAttachmentsBuilder$SeparateAttachments.doProcess(SeparateAttachmentsBuilder.java:79)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.stdlib.GenerateUUIDBuilder$GenerateUUID.doProcess(GenerateUUIDBuilder.java:98)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.tika.decompress.EmbeddedExtractor.parseEmbedded(EmbeddedExtractor.java:57)
at
org.kitesdk.morphline.tika.decompress.UnpackBuilder$Unpack.parseEntry(UnpackBuilder.java:138)
at
org.kitesdk.morphline.tika.decompress.UnpackBuilder$Unpack.doProcess(UnpackBuilder.java:113)
at
org.kitesdk.morphline.stdio.AbstractParser.doProcess(AbstractParser.java:96)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.stdlib.LogDebugBuilder$LogDebug.doProcess(LogDebugBuilder.java:58)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at
org.kitesdk.morphline.stdlib.TryRulesBuilder$TryRules.doProcess(TryRulesBuilder.java:115)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.tika.DetectMimeTypeBuilder$DetectMimeType.doProcess(DetectMimeTypeBuilder.java:166)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.stdlib.SeparateAttachmentsBuilder$SeparateAttachments.doProcess(SeparateAttachmentsBuilder.java:79)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at
org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:181)
at
org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:156)
at
org.apache.solr.hadoop.morphline.MorphlineMapRunner.map(MorphlineMapRunner.java:208)
... 10 more
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]