[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16304573#comment-16304573 ] ASF GitHub Bot commented on SOLR-11622: --- Github user mrkarthik closed the pull request at: https://github.com/apache/lucene-solr/pull/273 > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Fix For: 7.3 > > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16291959#comment-16291959 ] Karthik Ramachandran commented on SOLR-11622: - Thanks for creating the patch! > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16291937#comment-16291937 ] Tim Allison commented on SOLR-11622: Turns out I did because you had done most of the work! :) See https://github.com/apache/lucene-solr/pull/291 over on SOLR-11701. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16291516#comment-16291516 ] Karthik Ramachandran commented on SOLR-11622: - I don't mind, if you have time go ahead. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16291483#comment-16291483 ] Tim Allison commented on SOLR-11622: [~kramachand...@commvault.com], if it is ok with you and if I have time, I'll try to submit a PR on SOLR-11701. If I don't have time, it will be all yours after you return. :) Sound good...or do you want the glory? For the last integration test I did, I put [these documents|https://github.com/apache/tika/tree/master/tika-parsers/src/test/resources/test-documents] in a directory and ran tika-app.jar against them. I then ran tika-eval.jar and counted the number of files without exceptions to get a ground truth count of how many files I'd expect to be in Solr. I then used DIH to import the same directory, with skip on error, and made sure there were the same # of documents in Solr. This uncovered several problems, which we'll fix in this issue or SOLR-11701. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16291443#comment-16291443 ] Karthik Ramachandran commented on SOLR-11622: - [~talli...@mitre.org] i am going on vacation, will be back only after Christmas, I can work on it after that. After upgrade, do you have any specific test cases to run? I don't have much test documents or test cases for TIKA. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16291138#comment-16291138 ] Tim Allison commented on SOLR-11622: Sorry, right, yes, please and thank you. The question is whether Karthik wants to do a comprehensive upgrade to Tika 1.17 PR or whether I should...either way, with you, [~erickerickson] as the reviewer+committer. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16291129#comment-16291129 ] Erick Erickson commented on SOLR-11622: --- Make that Karthik should assign to me if that makes sense. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16291128#comment-16291128 ] Tim Allison commented on SOLR-11622: Thank you [~erickerickson]! Y, SOLR-11701 with [~kramachand...@commvault.com]'s fixes here could be unified into one PR that would upgrade us to Tika 1.17 and would fix numerous dependency problems that I found when I finally did an integration test with Tika's test files [above|https://issues.apache.org/jira/browse/SOLR-11622?focusedCommentId=16277347=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-16277347]. This single PR would close out this issue, the SOLR-11693 and SOLR-11701 _and_ clean up problems I haven't even opened issues for (msaccess, and ...) [~kramachand...@commvault.com], would you like to have a go at SOLR-11701, plagiarizing my notes, or should I plagiarize your work for SOLR-11701. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16291107#comment-16291107 ] Erick Erickson commented on SOLR-11622: --- Are you saying we'd wrap up all three of these in one go? Linked the other two to not lose the association if so. If someone else does the heavy lifting here I can be the committer fingers. I'll assign it to myself just to track, but if someone else wants to take I wouldn't object at all, especially someone more familiar with the dependency management. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16291082#comment-16291082 ] Tim Allison commented on SOLR-11622: I'm not a committer on Lucene/Solr so I can't help. Sorry. Now that Tika 1.17 is out, it would be great to get that fully integrated, to include your fixes (SOLR-11701)...especially because this would fix a nasty regression that prevents pptx files with tables from getting indexed (SOLR-11693). [~shalinmangar] or [~thetaphi], if [~kramachand...@commvault.com] or I put together a PR for SOLR-11701, would you be willing to review and commit? This time, I'll run DIH against Tika's unit test documents before making the PR... > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16291042#comment-16291042 ] Karthik Ramachandran commented on SOLR-11622: - [~talli...@mitre.org] who can help us review and commit the change? can you? > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16278747#comment-16278747 ] ASF GitHub Bot commented on SOLR-11622: --- Github user mrkarthik commented on the issue: https://github.com/apache/lucene-solr/pull/282 I think notes on the other PR is good. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16278541#comment-16278541 ] ASF GitHub Bot commented on SOLR-11622: --- Github user tballison commented on the issue: https://github.com/apache/lucene-solr/pull/282 let me know if I should offer a comprehensive PR including your work on my own, or if my "notes" on your other PR are sufficient. Thank you! > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16277359#comment-16277359 ] Tim Allison commented on SOLR-11622: My {{ant-precommit}} had the usual build failure with broken links...So, I think we're good. :) > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16277347#comment-16277347 ] Tim Allison commented on SOLR-11622: Smh...that we haven't run Solr against Tika's test files before/recently. This would have surfaced SOLR-11693. Unit tests would not have found that, but a full integration test would have. :( Speaking of which, with ref to [this|https://issues.apache.org/jira/browse/SOLR-11622?focusedCommentId=16274648=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-16274648], I'm still getting the CTTable xsb error on our {{testPPT_various.pptx}}, and you can't just do a drop and replace POI-3.17-beta1 with POI-3.17, because there's a binary conflict on wmf files. That fix will require the upgrade to Tika 1.17, which should be on the way. I'm guessing that you aren't seeing that because of the luck of your classloader? > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16277332#comment-16277332 ] ASF GitHub Bot commented on SOLR-11622: --- Github user tballison commented on the issue: https://github.com/apache/lucene-solr/pull/273 [SOLR-11622-tallison.diff.txt](https://github.com/apache/lucene-solr/files/1528516/SOLR-11622-tallison.diff.txt) This is the full `git diff 83753d0..d2f40af > SOLR-11622-tallison.diff` Had to add jackcess-encrypt for msaccess, bcpkix for psd files, and rome-utils for atom/rss. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16277318#comment-16277318 ] ASF GitHub Bot commented on SOLR-11622: --- Github user mrkarthik commented on the issue: https://github.com/apache/lucene-solr/pull/273 I had local branch solr-11622 and origin had SOLR-11622, for some reason my branch is getting deleted. Sorry for the all these deletes. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16277306#comment-16277306 ] ASF GitHub Bot commented on SOLR-11622: --- Github user tballison commented on the issue: https://github.com/apache/lucene-solr/pull/273 This was the patch I had before running ant precommit, etc., which is still running. You'll need to run ant- clean-jars, jar-checksums and do the git add/rm before this will work. [SOLR-11622_tallison.patch.txt](https://github.com/apache/lucene-solr/files/1528484/SOLR-11622_tallison.patch.txt) > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16277283#comment-16277283 ] ASF GitHub Bot commented on SOLR-11622: --- GitHub user mrkarthik reopened a pull request: https://github.com/apache/lucene-solr/pull/273 SOLR-11622: Fix mime4j library dependency for Tika You can merge this pull request into a Git repository by running: $ git pull https://github.com/mrkarthik/lucene-solr jira/SOLR-11622 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/lucene-solr/pull/273.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #273 commit c5d4e37de782b2491b3e71cfbb004e5022b55f6b Author: Karthik RamachandranDate: 2017-11-14T00:21:44Z SOLR-11622: Fix mime4j library dependency for Tika commit 40b246b12e8fc6455e023d9d60b8edcfab9b184e Author: Karthik Ramachandran Date: 2017-12-01T22:12:15Z Merge remote-tracking branch 'upstream/master' into jira/solr-11622 commit 21f2ab483f356fad9b89233e544457a07540afd1 Author: Karthik Ramachandran Date: 2017-12-03T03:50:01Z SOLR-11622: Fix bundled mime4j library not sufficient for Tika requirement commit a40ca80ed7036732a332f5508589ae32eb4b Author: Karthik Ramachandran Date: 2017-12-04T15:33:18Z Merge remote-tracking branch 'upstream/master' into jira/solr-11622 > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16277232#comment-16277232 ] Tim Allison commented on SOLR-11622: Finished analysis. Will submit PR to against your branch shortly. Working on {{ant precommit}} now. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16277213#comment-16277213 ] ASF GitHub Bot commented on SOLR-11622: --- Github user mrkarthik closed the pull request at: https://github.com/apache/lucene-solr/pull/282 > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16276976#comment-16276976 ] Tim Allison commented on SOLR-11622: Will do. I'm finding some other things that need to be fixed as well. I have no idea why neither I nor anyone else (apparently?) has run DIH on Tika's test files (at least recently?!)... We've got to change this in our processes. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16276970#comment-16276970 ] ASF GitHub Bot commented on SOLR-11622: --- Github user mrkarthik closed the pull request at: https://github.com/apache/lucene-solr/pull/273 > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16276969#comment-16276969 ] ASF GitHub Bot commented on SOLR-11622: --- Github user mrkarthik commented on the issue: https://github.com/apache/lucene-solr/pull/273 For some reason this pull request is not getting updated, created new pull request https://github.com/apache/lucene-solr/pull/282 > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16275814#comment-16275814 ] Karthik Ramachandran commented on SOLR-11622: - Updated the patch. https://github.com/apache/lucene-solr/pull/282 > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch, SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16274994#comment-16274994 ] Tim Allison commented on SOLR-11622: There's still a clash with jdom triggered by rss files and rometools {noformat] Exception in thread "Thread-21" java.lang.NoClassDefFoundError: org/jdom2/input/JDOMParseException at com.rometools.rome.io.SyndFeedInput.(SyndFeedInput.java:63) at com.rometools.rome.io.SyndFeedInput.(SyndFeedInput.java:51) {noformat} I'm confirming that should be bumped to 2.0.4. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16274648#comment-16274648 ] Karthik Ramachandran commented on SOLR-11622: - I just now read the bug report, we are currently using 6.6.2 and we d'not see any issue with ppt extraction. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16274578#comment-16274578 ] Tim Allison commented on SOLR-11622: Taking a look now. I want to run all of Tika's unit test docs through it to make sure I didn't botch anything else... You saw the POI bug in SOLR-11693? > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16272790#comment-16272790 ] Karthik Ramachandran commented on SOLR-11622: - [~talli...@mitre.org] with this patch we were able to process EML files, can you review the changes? > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16269524#comment-16269524 ] Tim Allison commented on SOLR-11622: Y. This was my mistake/omission in SOLR-10335. Ugh. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16254695#comment-16254695 ] Yeo Zheng Lin commented on SOLR-11622: -- I am facing the same issue when indexing the EML files. java.lang.NoClassDefFoundError: org/apache/james/mime4j/stream/MimeConfig$Builder Must we wait for this patch to be ready before we can index the EML files to Solr 7.1.0? > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Assignee: Karthik Ramachandran >Priority: Minor > Labels: build > Attachments: SOLR-11622.patch > > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at >
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16250537#comment-16250537 ] ASF GitHub Bot commented on SOLR-11622: --- GitHub user mrkarthik opened a pull request: https://github.com/apache/lucene-solr/pull/273 SOLR-11622: Fix mime4j library dependency for Tika You can merge this pull request into a Git repository by running: $ git pull https://github.com/mrkarthik/lucene-solr jira/SOLR-11622 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/lucene-solr/pull/273.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #273 commit c5d4e37de782b2491b3e71cfbb004e5022b55f6b Author: Karthik RamachandranDate: 2017-11-14T00:21:44Z SOLR-11622: Fix mime4j library dependency for Tika > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Priority: Minor > Labels: build > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251)
[jira] [Commented] (SOLR-11622) Bundled mime4j library not sufficient for Tika requirement
[ https://issues.apache.org/jira/browse/SOLR-11622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16247372#comment-16247372 ] Advokat commented on SOLR-11622: We have the same problem. E-Mails saved in *.eml / *.mht format do not seem to work at all right now. > Bundled mime4j library not sufficient for Tika requirement > -- > > Key: SOLR-11622 > URL: https://issues.apache.org/jira/browse/SOLR-11622 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Build >Affects Versions: 7.1, 6.6.2 >Reporter: Karim Malhas >Priority: Minor > Labels: build > > The version 7.2 of Apache James Mime4j bundled with the Solr binary releases > does not match what is required by Apache Tika for parsing rfc2822 messages. > The master branch for james-mime4j seems to contain the missing Builder class > [https://github.com/apache/james-mime4j/blob/master/core/src/main/java/org/apache/james/mime4j/stream/MimeConfig.java > ] > This prevents import of rfc2822 formatted messages. For example like so: > {{./bin/post -c dovecot -type 'message/rfc822' 'testdata/email_01.txt' > }} > And results in the following stacktrace: > java.lang.NoClassDefFoundError: > org/apache/james/mime4j/stream/MimeConfig$Builder > at > org.apache.tika.parser.mail.RFC822Parser.parse(RFC822Parser.java:63) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:136) > at >