autumnust commented on a change in pull request #3177:
URL: https://github.com/apache/incubator-gobblin/pull/3177#discussion_r545315929



##########
File path: 
gobblin-utility/src/main/java/org/apache/gobblin/util/ClustersNames.java
##########
@@ -97,21 +110,34 @@ public void addClusterMapping(URL clusterUrl, String 
clusterName) {
     this.urlToNameMap.put(clusterUrl.toString(), clusterName);
   }
 
-  // Strip out the port number if it is a valid URI
-  private static String normalizeClusterUrl(String clusterIdentifier) {
+  private static List<String> generateUrlMatchCandidates(String 
clusterIdentifier) {
+    ArrayList<String> candidates = new ArrayList<>();
+    candidates.add(clusterIdentifier);
+
     try {
       URI uri = new URI(clusterIdentifier.trim());
-      // URIs without protocol prefix
-      if (!uri.isOpaque() && null != uri.getHost()) {
-        clusterIdentifier = uri.getHost();
+      if (uri.getHost() != null) {
+        if (uri.getPort() != -1) {
+          candidates.add(uri.getHost() + ":" + uri.getPort());
+        }
+
+        candidates.add(uri.getHost());
+      } else if (uri.getScheme() != null && uri.getPath() != null) {
+        // we have a scheme and a path, but not the host name
+        // assuming local host
+        candidates.add("localhost");
       } else {
-        clusterIdentifier = uri.toString().replaceAll("[/:]"," 
").trim().replaceAll(" ", "_");
+        candidates.add(getNormalizedName(clusterIdentifier));
       }
     } catch (URISyntaxException e) {
-      //leave ID as is
+      candidates.add(getNormalizedName(clusterIdentifier));
     }
 
-    return clusterIdentifier;
+    return candidates;
+  }
+
+  private static String getNormalizedName(String clusterIdentifier) {

Review comment:
       I am not quite following the purpose for this. 

##########
File path: 
gobblin-utility/src/main/java/org/apache/gobblin/util/ClustersNames.java
##########
@@ -97,21 +110,34 @@ public void addClusterMapping(URL clusterUrl, String 
clusterName) {
     this.urlToNameMap.put(clusterUrl.toString(), clusterName);
   }
 
-  // Strip out the port number if it is a valid URI
-  private static String normalizeClusterUrl(String clusterIdentifier) {
+  private static List<String> generateUrlMatchCandidates(String 
clusterIdentifier) {
+    ArrayList<String> candidates = new ArrayList<>();
+    candidates.add(clusterIdentifier);
+
     try {
       URI uri = new URI(clusterIdentifier.trim());
-      // URIs without protocol prefix
-      if (!uri.isOpaque() && null != uri.getHost()) {
-        clusterIdentifier = uri.getHost();
+      if (uri.getHost() != null) {
+        if (uri.getPort() != -1) {
+          candidates.add(uri.getHost() + ":" + uri.getPort());
+        }
+
+        candidates.add(uri.getHost());

Review comment:
       I don't see any chance for host:port to be picked here since getHost 
will always be placed after that. Is this intentional ? Or can you give an unit 
test case to return host:port 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to