[
https://issues.apache.org/jira/browse/TIKA-1735?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17536292#comment-17536292
]
ASF GitHub Bot commented on TIKA-1735:
--------------------------------------
monkmachine commented on code in PR #558:
URL: https://github.com/apache/tika/pull/558#discussion_r871707028
##########
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-cad-module/src/main/java/org/apache/tika/parser/dwg/DWGReadParser.java:
##########
@@ -0,0 +1,208 @@
+package org.apache.tika.parser.dwg;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.UUID;
+import java.util.function.Consumer;
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.commons.io.FileUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.core.json.JsonReadFeature;
+
+
+public class DWGReadParser extends AbstractDWGParser {
+ private static final Logger LOG = LoggerFactory.getLogger(DWGParser.class);
+ /**
+ *
+ */
+ private static final long serialVersionUID = 7983127145030096837L;
+ private static MediaType TYPE = MediaType.image("vnd.dwg");
+
+ public Set < MediaType > getSupportedTypes(ParseContext context) {
+ return Collections.singleton(TYPE);
+ }
+
+ @Override
+ public void parse(InputStream stream, ContentHandler handler, Metadata
metadata, ParseContext context)
+ throws IOException, SAXException, TikaException {
+
+ configure(context);
+ DWGParserConfig dwgc = context.get(DWGParserConfig.class);
+ final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler,
metadata);
+
+ xhtml.startDocument();
+ UUID uuid = UUID.randomUUID();
+ File tmpFileOut = File.createTempFile(uuid + "dwgreadout", ".json");
+ File tmpFileOutCleaned = File.createTempFile(uuid + "dwgreadoutclean",
".json");
+ File tmpFileIn = File.createTempFile(uuid + "dwgreadin", ".dwg");
+ try {
+
+ FileUtils.copyInputStreamToFile(stream, tmpFileIn);
+
+ List < String > command =
Arrays.asList(dwgc.getDwgReadExecutable(), "-O", "JSON", "-o",
Review Comment:
@tballison added ProcessUtils.execute instead of using ProcessBuilder
manually, also added timeout (default 5mins as I have found some do take some
time) to the DWGParserConfig
> Unsupported AutoCAD drawing version: AC1027
> -------------------------------------------
>
> Key: TIKA-1735
> URL: https://issues.apache.org/jira/browse/TIKA-1735
> Project: Tika
> Issue Type: Bug
> Reporter: Luca Perico
> Priority: Major
> Attachments: testDWG-AC1027.dwg
>
>
> Trying to index .dwg file (version AC1027) I get 500 error response.
> "<?xml version=""1.0"" encoding=""UTF-8""?>
> <response>
> <lst name=""responseHeader""><int name=""status"">500</int><int
> name=""QTime"">3</int></lst><lst name=""error""><str A1:F378 Unsupported
> AutoCAD drawing version: AC1027</str><str
> name=""trace"">org.apache.solr.common.SolrException:
> org.apache.tika.exception.TikaException: Unsupported AutoCAD drawing version:
> AC1027
> at
> org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:227)
> at
> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
> at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:143)
> at org.apache.solr.core.SolrCore.execute(SolrCore.java:2064)
> at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:654)
> at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:450)
> at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:227)
> at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:196)
> at
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1652)
> at
> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:585)
> at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
> at
> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:577)
> at
> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:223)
> at
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1127)
> at
> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:515)
> at
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)
> at
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1061)
> at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
> at
> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:215)
> at
> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:110)
> at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:97)
> at org.eclipse.jetty.server.Server.handle(Server.java:497)
> at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:310)
> at
> org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:257)
> at
> org.eclipse.jetty.io.AbstractConnection$2.run(AbstractConnection.java:540)
> at
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:635)
> at
> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:555)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.tika.exception.TikaException: Unsupported AutoCAD
> drawing version: AC1027
> at org.apache.tika.parser.dwg.DWGParser.parse(DWGParser.java:131)
> at
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:256)
> at
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:256)
> at
> org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)
> at
> org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:221)
> ... 27 more
> </str><int name=""code"">500</int></lst>
> </response>"
--
This message was sent by Atlassian Jira
(v8.20.7#820007)