This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 8e41708 6355 don't lose authority portion of inputDirURI (#6359)
8e41708 is described below
commit 8e417087e1e98015c8dab1b52c37c240e2ffdb33
Author: Ken Krugler <[email protected]>
AuthorDate: Thu Dec 17 01:10:37 2020 -0800
6355 don't lose authority portion of inputDirURI (#6359)
* Add more files/dirs that appeared after top-level build & mvn eclipse
* Fix dropping of authority & other pre-path elements from input URI
* Fix up header for new test file
* Fix up formatting to match Pinot standard
* Add test for generation of output files to URIs with authority/userInfo
---
.gitignore | 2 +
.../batch/common/SegmentGenerationUtilsTest.java | 16 +++++++
.../standalone/SegmentGenerationJobRunner.java | 10 ++--
.../standalone/SegmentGenerationJobRunnerTest.java | 54 ++++++++++++++++++++++
.../src/test/resources/log4j2.xml | 35 ++++++++++++++
5 files changed, 114 insertions(+), 3 deletions(-)
diff --git a/.gitignore b/.gitignore
index 192701d..1d0a8be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@ cscope.*
.classpath
.project
.svn
+.externalToolBuilders/
+maven-eclipse.xml
target/
bin/
*/bin/
diff --git
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/test/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationUtilsTest.java
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/test/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationUtilsTest.java
index aa65ffe..1753c95 100644
---
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/test/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationUtilsTest.java
+++
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/test/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationUtilsTest.java
@@ -22,6 +22,7 @@ package org.apache.pinot.plugin.ingestion.batch.common;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.net.URI;
+import java.net.URISyntaxException;
public class SegmentGenerationUtilsTest {
@@ -37,4 +38,19 @@ public class SegmentGenerationUtilsTest {
Assert.assertEquals(SegmentGenerationUtils.getFileName(URI.create("hdfs://var/data/myTable/2020/04/06/input.data")),
"input.data");
}
+
+ // Confirm output path generation works with URIs that have
authority/userInfo.
+
+ @Test
+ public void testRelativeURIs() throws URISyntaxException {
+ URI inputDirURI = new URI("hdfs://namenode1:9999/path/to/");
+ URI inputFileURI = new URI("hdfs://namenode1:9999/path/to/subdir/file");
+ URI outputDirURI = new URI("hdfs://namenode2/output/dir/");
+ URI segmentTarFileName = new URI("file.tar.gz");
+ URI outputSegmentTarURI = SegmentGenerationUtils
+ .getRelativeOutputPath(inputDirURI, inputFileURI,
outputDirURI).resolve(segmentTarFileName);
+ Assert.assertEquals(outputSegmentTarURI.toString(),
+ "hdfs://namenode2/output/dir/subdir/file.tar.gz");
+ }
+
}
diff --git
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java
index fc2f9be..db1b11a 100644
---
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java
+++
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java
@@ -51,6 +51,8 @@ import org.apache.pinot.spi.utils.DataSizeUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.annotations.VisibleForTesting;
+
public class SegmentGenerationJobRunner implements IngestionJobRunner {
@@ -172,7 +174,7 @@ public class SegmentGenerationJobRunner implements
IngestionJobRunner {
CountDownLatch segmentCreationTaskCountDownLatch = new
CountDownLatch(numInputFiles);
//iterate on the file list, for each
for (int i = 0; i < numInputFiles; i++) {
- final URI inputFileURI = getFileURI(filteredFiles.get(i),
inputDirURI.getScheme());
+ final URI inputFileURI = getFileURI(filteredFiles.get(i), inputDirURI);
//copy input path to local
File localInputDataFile = new File(localInputTempDir, new
File(inputFileURI.getPath()).getName());
@@ -244,11 +246,13 @@ public class SegmentGenerationJobRunner implements
IngestionJobRunner {
return uri;
}
- private URI getFileURI(String uriStr, String fallbackScheme)
+ @VisibleForTesting
+ protected static URI getFileURI(String uriStr, URI fullUriForPathOnlyUriStr)
throws URISyntaxException {
URI fileURI = URI.create(uriStr);
if (fileURI.getScheme() == null) {
- return new URI(fallbackScheme, fileURI.getSchemeSpecificPart(),
fileURI.getFragment());
+ return new URI(fullUriForPathOnlyUriStr.getScheme(),
fullUriForPathOnlyUriStr.getAuthority(),
+ fileURI.getPath(), fileURI.getQuery(), fileURI.getFragment());
}
return fileURI;
}
diff --git
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/test/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunnerTest.java
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/test/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunnerTest.java
new file mode 100644
index 0000000..802214b
--- /dev/null
+++
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/test/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunnerTest.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.plugin.ingestion.batch.standalone;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+public class SegmentGenerationJobRunnerTest {
+
+ // Don't lose authority portion of inputDirURI when creating output files
+ // https://github.com/apache/incubator-pinot/issues/6355
+
+ @Test
+ public void testGetFileURI() throws Exception {
+ // Typical file URI
+ validateFileURI(new URI("file:/path/to/"));
+
+ // Namenode as authority, plus non-standard port
+ validateFileURI(new URI("hdfs://namenode:9999/path/to/"));
+
+ // S3 bucket + path
+ validateFileURI(new URI("s3://bucket/path/to/"));
+
+ // S3 URI with userInfo (username/password)
+ validateFileURI(new URI("s3://username:password@bucket/path/to/"));
+ }
+
+ private void validateFileURI(URI directoryURI) throws URISyntaxException {
+ URI fileURI = new URI(directoryURI.toString() + "file");
+ String rawPath = fileURI.getRawPath();
+
+ Assert.assertEquals(SegmentGenerationJobRunner.getFileURI(rawPath,
fileURI).toString(),
+ fileURI.toString());
+
+ }
+}
diff --git
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/test/resources/log4j2.xml
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/test/resources/log4j2.xml
new file mode 100644
index 0000000..4bbc67f
--- /dev/null
+++
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/test/resources/log4j2.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<Configuration>
+ <Appenders>
+ <Console name="console" target="SYSTEM_OUT">
+ <PatternLayout>
+ <pattern>%d{HH:mm:ss.SSS} %c{1} - %m%n</pattern>
+ </PatternLayout>
+ </Console>
+ </Appenders>
+ <Loggers>
+ <AsyncRoot level="warn" additivity="false">
+ <AppenderRef ref="console" />
+ </AsyncRoot>
+ </Loggers>
+</Configuration>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]