[
https://issues.apache.org/jira/browse/HADOOP-14971?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16218955#comment-16218955
]
ASF GitHub Bot commented on HADOOP-14971:
-----------------------------------------
Github user steveloughran commented on a diff in the pull request:
https://github.com/apache/hadoop/pull/282#discussion_r146906771
--- Diff:
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/Paths.java
---
@@ -0,0 +1,306 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.commit.staging;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+import com.google.common.base.Objects;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.PathIsDirectoryException;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import static org.apache.hadoop.fs.s3a.Constants.BUFFER_DIR;
+import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*;
+import static
org.apache.hadoop.fs.s3a.commit.staging.StagingCommitterConstants.*;
+
+/**
+ * Path operations for the staging committers.
+ */
+public final class Paths {
+
+ private Paths() {
+ }
+
+ /**
+ * Insert the UUID to a path if it is not there already.
+ * If there is a trailing "." in the prefix after the last slash, the
+ * UUID is inserted before it with a "-" prefix; otherwise appended.
+ *
+ * Examples:
+ * <pre>
+ * /example/part-0000 ==> /example/part-0000-0ab34
+ * /example/part-0001.gz.csv ==> /example/part-0001-0ab34.gz.csv
+ * /example/part-0002-0abc3.gz.csv ==>
/example/part-0002-0abc3.gz.csv
+ * /example0abc3/part-0002.gz.csv ==>
/example0abc3/part-0002.gz.csv
+ * </pre>
+ *
+ *
+ * @param pathStr path as a string; must not have a trailing "/".
+ * @param uuid UUID to append; must not be empty
+ * @return new path.
+ */
+ public static String addUUID(String pathStr, String uuid) {
+ Preconditions.checkArgument(StringUtils.isNotEmpty(pathStr), "empty
path");
+ Preconditions.checkArgument(StringUtils.isNotEmpty(uuid), "empty
uuid");
+ // In some cases, Spark will add the UUID to the filename itself.
+ if (pathStr.contains(uuid)) {
+ return pathStr;
+ }
+
+ int dot; // location of the first '.' in the file name
+ int lastSlash = pathStr.lastIndexOf('/');
+ if (lastSlash >= 0) {
+ Preconditions.checkState(lastSlash + 1 < pathStr.length(),
+ "Bad path: " + pathStr);
+ dot = pathStr.indexOf('.', lastSlash);
+ } else {
+ dot = pathStr.indexOf('.');
+ }
+
+ if (dot >= 0) {
+ return pathStr.substring(0, dot) + "-" + uuid +
pathStr.substring(dot);
+ } else {
+ return pathStr + "-" + uuid;
+ }
+ }
+
+ /**
+ * Get the parent path of a string path: everything up to but excluding
+ * the last "/" in the path.
+ * @param pathStr path as a string
+ * @return the parent or null if there is no parent.
+ */
+ public static String getParent(String pathStr) {
+ int lastSlash = pathStr.lastIndexOf('/');
+ if (lastSlash >= 0) {
+ return pathStr.substring(0, lastSlash);
+ }
+ return null;
+ }
+
+ /**
+ * Using {@code URI#relativize()}, build the relative path from the
+ * base path to the full path.
+ * If {@code childPath} is not a child of {@code basePath} the outcome
+ * os undefined.
+ * @param basePath base path
+ * @param childPath full path under the base path.
+ * @return the relative path
+ */
+ public static String getRelativePath(Path basePath,
+ Path childPath) {
+ //
+ // Use URI.create(Path#toString) to avoid URI character escape bugs
+ URI relative = URI.create(basePath.toString())
+ .relativize(URI.create(childPath.toString()));
+ return relative.getPath();
--- End diff --
where was fullPath created?
> Merge S3A committers into trunk
> -------------------------------
>
> Key: HADOOP-14971
> URL: https://issues.apache.org/jira/browse/HADOOP-14971
> Project: Hadoop Common
> Issue Type: Sub-task
> Components: fs/s3
> Affects Versions: 3.0.0
> Reporter: Steve Loughran
> Assignee: Steve Loughran
>
> Merge the HADOOP-13786 committer into trunk. This branch is being set up as a
> github PR for review there & to keep it out the mailboxes of the watchers on
> the main JIRA
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]