This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e41708  6355 don't lose authority portion of inputDirURI (#6359)
8e41708 is described below

commit 8e417087e1e98015c8dab1b52c37c240e2ffdb33
Author: Ken Krugler <[email protected]>
AuthorDate: Thu Dec 17 01:10:37 2020 -0800

    6355 don't lose authority portion of inputDirURI (#6359)
    
    * Add more files/dirs that appeared after top-level build & mvn eclipse
    
    * Fix dropping of authority & other pre-path elements from input URI
    
    * Fix up header for new test file
    
    * Fix up formatting to match Pinot standard
    
    * Add test for generation of output files to URIs with authority/userInfo
---
 .gitignore                                         |  2 +
 .../batch/common/SegmentGenerationUtilsTest.java   | 16 +++++++
 .../standalone/SegmentGenerationJobRunner.java     | 10 ++--
 .../standalone/SegmentGenerationJobRunnerTest.java | 54 ++++++++++++++++++++++
 .../src/test/resources/log4j2.xml                  | 35 ++++++++++++++
 5 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 192701d..1d0a8be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@ cscope.*
 .classpath
 .project
 .svn
+.externalToolBuilders/
+maven-eclipse.xml
 target/
 bin/
 */bin/
diff --git 
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/test/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationUtilsTest.java
 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/test/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationUtilsTest.java
index aa65ffe..1753c95 100644
--- 
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/test/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationUtilsTest.java
+++ 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/test/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationUtilsTest.java
@@ -22,6 +22,7 @@ package org.apache.pinot.plugin.ingestion.batch.common;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 import java.net.URI;
+import java.net.URISyntaxException;
 
 public class SegmentGenerationUtilsTest {
 
@@ -37,4 +38,19 @@ public class SegmentGenerationUtilsTest {
     
Assert.assertEquals(SegmentGenerationUtils.getFileName(URI.create("hdfs://var/data/myTable/2020/04/06/input.data")),
         "input.data");
   }
+  
+  // Confirm output path generation works with URIs that have 
authority/userInfo.
+  
+  @Test
+  public void testRelativeURIs() throws URISyntaxException {
+    URI inputDirURI = new URI("hdfs://namenode1:9999/path/to/");
+    URI inputFileURI = new URI("hdfs://namenode1:9999/path/to/subdir/file");
+    URI outputDirURI = new URI("hdfs://namenode2/output/dir/");
+    URI segmentTarFileName = new URI("file.tar.gz");
+    URI outputSegmentTarURI = SegmentGenerationUtils
+        .getRelativeOutputPath(inputDirURI, inputFileURI, 
outputDirURI).resolve(segmentTarFileName);
+    Assert.assertEquals(outputSegmentTarURI.toString(),
+        "hdfs://namenode2/output/dir/subdir/file.tar.gz");
+  }
+  
 }
diff --git 
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java
 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java
index fc2f9be..db1b11a 100644
--- 
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java
+++ 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java
@@ -51,6 +51,8 @@ import org.apache.pinot.spi.utils.DataSizeUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.annotations.VisibleForTesting;
+
 
 public class SegmentGenerationJobRunner implements IngestionJobRunner {
 
@@ -172,7 +174,7 @@ public class SegmentGenerationJobRunner implements 
IngestionJobRunner {
       CountDownLatch segmentCreationTaskCountDownLatch = new 
CountDownLatch(numInputFiles);
       //iterate on the file list, for each
       for (int i = 0; i < numInputFiles; i++) {
-        final URI inputFileURI = getFileURI(filteredFiles.get(i), 
inputDirURI.getScheme());
+        final URI inputFileURI = getFileURI(filteredFiles.get(i), inputDirURI);
 
         //copy input path to local
         File localInputDataFile = new File(localInputTempDir, new 
File(inputFileURI.getPath()).getName());
@@ -244,11 +246,13 @@ public class SegmentGenerationJobRunner implements 
IngestionJobRunner {
     return uri;
   }
 
-  private URI getFileURI(String uriStr, String fallbackScheme)
+  @VisibleForTesting
+  protected static URI getFileURI(String uriStr, URI fullUriForPathOnlyUriStr)
       throws URISyntaxException {
     URI fileURI = URI.create(uriStr);
     if (fileURI.getScheme() == null) {
-      return new URI(fallbackScheme, fileURI.getSchemeSpecificPart(), 
fileURI.getFragment());
+      return new URI(fullUriForPathOnlyUriStr.getScheme(), 
fullUriForPathOnlyUriStr.getAuthority(),
+              fileURI.getPath(), fileURI.getQuery(), fileURI.getFragment());
     }
     return fileURI;
   }
diff --git 
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/test/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunnerTest.java
 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/test/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunnerTest.java
new file mode 100644
index 0000000..802214b
--- /dev/null
+++ 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/test/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunnerTest.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.plugin.ingestion.batch.standalone;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+public class SegmentGenerationJobRunnerTest {
+
+  // Don't lose authority portion of inputDirURI when creating output files
+  // https://github.com/apache/incubator-pinot/issues/6355
+
+  @Test
+  public void testGetFileURI() throws Exception {
+    // Typical file URI
+    validateFileURI(new URI("file:/path/to/"));
+
+    // Namenode as authority, plus non-standard port
+    validateFileURI(new URI("hdfs://namenode:9999/path/to/"));
+
+    // S3 bucket + path
+    validateFileURI(new URI("s3://bucket/path/to/"));
+
+    // S3 URI with userInfo (username/password)
+    validateFileURI(new URI("s3://username:password@bucket/path/to/"));
+  }
+
+  private void validateFileURI(URI directoryURI) throws URISyntaxException {
+    URI fileURI = new URI(directoryURI.toString() + "file");
+    String rawPath = fileURI.getRawPath();
+
+    Assert.assertEquals(SegmentGenerationJobRunner.getFileURI(rawPath, 
fileURI).toString(),
+        fileURI.toString());
+
+  }
+}
diff --git 
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/test/resources/log4j2.xml
 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/test/resources/log4j2.xml
new file mode 100644
index 0000000..4bbc67f
--- /dev/null
+++ 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/test/resources/log4j2.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+<Configuration>
+  <Appenders>
+    <Console name="console" target="SYSTEM_OUT">
+      <PatternLayout>
+        <pattern>%d{HH:mm:ss.SSS} %c{1} - %m%n</pattern>
+      </PatternLayout>
+    </Console>
+  </Appenders>
+  <Loggers>
+    <AsyncRoot level="warn" additivity="false">
+      <AppenderRef ref="console" />
+    </AsyncRoot>
+  </Loggers>
+</Configuration>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to