This is an automated email from the ASF dual-hosted git repository.

benjobs pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-streampark.git


The following commit(s) were added to refs/heads/dev by this push:
     new d2a10d604 [Improve] Streampark-common module tika dependency moved to 
the streampark-console module (#3171)
d2a10d604 is described below

commit d2a10d604585e521268691a6df6ab0a201fdf94c
Author: ChengJie1053 <[email protected]>
AuthorDate: Sun Sep 24 10:54:13 2023 +0800

    [Improve] Streampark-common module tika dependency moved to the 
streampark-console module (#3171)
    
    * [Improve] Streampark-common module tika dependency moved to the 
streampark-console module
    
    * Modify UploadFileTypeInterceptor
    
    * Modify isPythonFileType
---
 dist-material/release-docs/LICENSE                 |  1 +
 streampark-common/pom.xml                          |  7 -----
 .../apache/streampark/common/util/FileUtils.scala  | 20 -------------
 .../streampark-console-service/pom.xml             |  7 +++++
 .../interceptor/UploadFileTypeInterceptor.java     | 35 ++++++++++++++++++++--
 5 files changed, 40 insertions(+), 30 deletions(-)

diff --git a/dist-material/release-docs/LICENSE 
b/dist-material/release-docs/LICENSE
index b6188637f..d8d863007 100644
--- a/dist-material/release-docs/LICENSE
+++ b/dist-material/release-docs/LICENSE
@@ -433,6 +433,7 @@ The text of each license is the standard Apache 2.0 
license. https://www.apache.
     https://mvnrepository.com/artifact/org.apache.shiro/shiro-web/1.10.0 
Apache-2.0
     
https://mvnrepository.com/artifact/org.apache.tomcat.embed/tomcat-embed-el/9.0.65
 Apache-2.0
     https://mvnrepository.com/artifact/org.apache.zookeeper/zookeeper/3.4.6 
Apache-2.0
+    https://mvnrepository.com/artifact/org.apache.tika/tika-core/1.20 
Apache-2.0
     
https://mvnrepository.com/artifact/org.asynchttpclient/async-http-client/2.12.3 
Apache-2.0
     
https://mvnrepository.com/artifact/org.asynchttpclient/async-http-client-netty-utils/2.12.3
 Apache-2.0
     
https://mvnrepository.com/artifact/org.codehaus.jackson/jackson-core-asl/1.9.13 
Apache-2.0
diff --git a/streampark-common/pom.xml b/streampark-common/pom.xml
index 3214107ee..32babf65c 100644
--- a/streampark-common/pom.xml
+++ b/streampark-common/pom.xml
@@ -156,13 +156,6 @@
             <optional>true</optional>
         </dependency>
 
-        <!--tika -->
-        <dependency>
-            <groupId>org.apache.tika</groupId>
-            <artifactId>tika-core</artifactId>
-            <version>1.20</version>
-        </dependency>
-
     </dependencies>
 
     <build>
diff --git 
a/streampark-common/src/main/scala/org/apache/streampark/common/util/FileUtils.scala
 
b/streampark-common/src/main/scala/org/apache/streampark/common/util/FileUtils.scala
index 6ebad0699..6b95884bc 100644
--- 
a/streampark-common/src/main/scala/org/apache/streampark/common/util/FileUtils.scala
+++ 
b/streampark-common/src/main/scala/org/apache/streampark/common/util/FileUtils.scala
@@ -18,12 +18,6 @@ package org.apache.streampark.common.util
 
 import org.apache.streampark.common.util.ImplicitsUtils._
 
-import org.apache.commons.lang3.StringUtils
-import org.apache.tika.metadata.{HttpHeaders, Metadata}
-import org.apache.tika.mime.MediaType
-import org.apache.tika.parser.{AutoDetectParser, ParseContext}
-import org.xml.sax.helpers.DefaultHandler
-
 import java.io._
 import java.net.URL
 import java.nio.ByteBuffer
@@ -64,20 +58,6 @@ object FileUtils {
       }) == "504B0304"
   }
 
-  def isPythonFileType(contentType: String, input: InputStream): Boolean = {
-    if (StringUtils.isBlank(contentType) || input == null) {
-      throw new RuntimeException("The contentType or inputStream can not be 
null")
-    }
-    getMimeType(input) == MediaType.TEXT_PLAIN.toString && 
contentType.contains("text/x-python")
-  }
-
-  def getMimeType(stream: InputStream): String = {
-    val metadata: Metadata = new Metadata
-    val parser: AutoDetectParser = new AutoDetectParser
-    parser.parse(stream, new DefaultHandler, metadata, new ParseContext)
-    metadata.get(HttpHeaders.CONTENT_TYPE)
-  }
-
   def isJarFileType(file: File): Boolean = {
     if (!file.exists || !file.isFile) {
       throw new RuntimeException("The file does not exist or the path is a 
directory")
diff --git a/streampark-console/streampark-console-service/pom.xml 
b/streampark-console/streampark-console-service/pom.xml
index 420ac6fd5..cf3b7c4d2 100644
--- a/streampark-console/streampark-console-service/pom.xml
+++ b/streampark-console/streampark-console-service/pom.xml
@@ -442,6 +442,13 @@
             <artifactId>log4j-over-slf4j</artifactId>
         </dependency>
 
+        <!--tika -->
+        <dependency>
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-core</artifactId>
+            <version>1.20</version>
+        </dependency>
+
         <!-- logback -->
         <dependency>
             <groupId>ch.qos.logback</groupId>
diff --git 
a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/base/interceptor/UploadFileTypeInterceptor.java
 
b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/base/interceptor/UploadFileTypeInterceptor.java
index 169c8f457..b4a0483c2 100644
--- 
a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/base/interceptor/UploadFileTypeInterceptor.java
+++ 
b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/base/interceptor/UploadFileTypeInterceptor.java
@@ -20,20 +20,33 @@ package org.apache.streampark.console.base.interceptor;
 import org.apache.streampark.common.util.FileUtils;
 import org.apache.streampark.console.base.exception.ApiAlertException;
 
+import org.apache.commons.lang3.StringUtils;
+import org.apache.tika.metadata.HttpHeaders;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.springframework.stereotype.Component;
 import org.springframework.web.multipart.MultipartFile;
 import org.springframework.web.multipart.MultipartHttpServletRequest;
 import org.springframework.web.servlet.HandlerInterceptor;
 import org.springframework.web.servlet.ModelAndView;
+import org.xml.sax.helpers.DefaultHandler;
 
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
 
+import java.io.InputStream;
 import java.util.Map;
 
 @Component
 public class UploadFileTypeInterceptor implements HandlerInterceptor {
 
+  private static final Logger logger = 
LoggerFactory.getLogger(UploadFileTypeInterceptor.class);
+
   @Override
   public boolean preHandle(HttpServletRequest request, HttpServletResponse 
response, Object handler)
       throws Exception {
@@ -46,16 +59,32 @@ public class UploadFileTypeInterceptor implements 
HandlerInterceptor {
             multipartFile, "File to upload can't be null. Upload file 
failed.");
         boolean isJarOrPyFile =
             FileUtils.isJarFileType(multipartFile.getInputStream())
-                || FileUtils.isPythonFileType(
-                    multipartFile.getContentType(), 
multipartFile.getInputStream());
+                || isPythonFileType(multipartFile.getContentType(), 
multipartFile.getInputStream());
         ApiAlertException.throwIfFalse(
             isJarOrPyFile,
-            "Illegal file type, Only support standard jar or py files. Upload 
file failed.");
+            "Illegal file type, Only support standard jar files. Upload file 
failed.");
       }
     }
     return true;
   }
 
+  private boolean isPythonFileType(String contentType, InputStream input) {
+    if (StringUtils.isBlank(contentType) || input == null) {
+      throw new RuntimeException("The contentType or inputStream can not be 
null");
+    }
+    try {
+      Metadata metadata = new Metadata();
+      AutoDetectParser parser = new AutoDetectParser();
+      parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+      String mimeType = metadata.get(HttpHeaders.CONTENT_TYPE);  
+      return contentType.contains("text/x-python")
+          && MediaType.TEXT_PLAIN.toString().equals(mimeType);
+    } catch (Exception e) {
+      logger.warn("MimeType parse failed", e);
+      return false;
+    }
+  }
+
   @Override
   public void postHandle(
       HttpServletRequest request,

Reply via email to