Hisoka-X commented on code in PR #7278:
URL: https://github.com/apache/seatunnel/pull/7278#discussion_r1694629810
##########
seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java:
##########
@@ -427,20 +429,12 @@ public boolean accept(File pathname) {
pathname.getName(),
pluginJarPrefix);
}
});
- if (ArrayUtils.isEmpty(targetPluginFiles)) {
+ if (ArrayUtils.isEmpty(targetPluginFiles) || targetPluginFiles ==
null) {
return Optional.empty();
}
- if (targetPluginFiles.length > 1) {
- throw new IllegalArgumentException(
- "Found multiple plugin jar: "
- + Arrays.stream(targetPluginFiles)
- .map(File::getPath)
- .collect(Collectors.joining(","))
- + " for pluginIdentifier: "
- + pluginIdentifier);
- }
try {
- URL pluginJarPath = targetPluginFiles[0].toURI().toURL();
+ URL pluginJarPath =
+ findMostSimlarPluginJarFile(targetPluginFiles,
pluginJarPrefix).toURI().toURL();
Review Comment:
```suggestion
if (targetPluginFiles.length == 1) {
URL pluginJarPath = targetPluginFiles[0].toURI().toURL();
} else {
URL pluginJarPath =
findMostSimlarPluginJarFile(targetPluginFiles,
pluginJarPrefix).toURI().toURL();
}
```
##########
seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java:
##########
@@ -451,4 +445,105 @@ public boolean accept(File pathname) {
return Optional.empty();
}
}
+
+ private static File findMostSimlarPluginJarFile(
+ File[] targetPluginFiles, String pluginJarPrefix) {
+ String splitRegex = "\\-|\\_|\\.";
+ double maxSimlarity = -Integer.MAX_VALUE;
+ int mostSimlarPluginJarFileIndex = -1;
+ for (int i = 0; i < targetPluginFiles.length; i++) {
+ File file = targetPluginFiles[i];
+ String fileName = file.getName();
+ double similarity =
+ CosineSimilarityUtil.cosineSimilarity(pluginJarPrefix,
fileName, splitRegex);
+ if (similarity > maxSimlarity) {
+ maxSimlarity = similarity;
+ mostSimlarPluginJarFileIndex = i;
+ }
+ }
+ return targetPluginFiles[mostSimlarPluginJarFileIndex];
+ }
+
+ static class CosineSimilarityUtil {
+ public static double cosineSimilarity(String textA, String textB,
String splitRegrex) {
+ Set<String> words1 =
+ new
HashSet<>(Arrays.asList(textA.toLowerCase().split(splitRegrex)));
+ Set<String> words2 =
+ new
HashSet<>(Arrays.asList(textB.toLowerCase().split(splitRegrex)));
+ int[] termFrequency1 = calculateTermFrequencyVector(textA, words1,
splitRegrex);
+ int[] termFrequency2 = calculateTermFrequencyVector(textB, words2,
splitRegrex);
+ return calculateCosineSimilarity(termFrequency1, termFrequency2);
+ }
+
+ public static int[] calculateTermFrequencyVector(
Review Comment:
```suggestion
private static int[] calculateTermFrequencyVector(
```
##########
seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java:
##########
@@ -427,20 +429,12 @@ public boolean accept(File pathname) {
pathname.getName(),
pluginJarPrefix);
}
});
- if (ArrayUtils.isEmpty(targetPluginFiles)) {
+ if (ArrayUtils.isEmpty(targetPluginFiles) || targetPluginFiles ==
null) {
Review Comment:
```suggestion
if (ArrayUtils.isEmpty(targetPluginFiles)) {
```
`ArrayUtils.isEmpty` already contains null check.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]