Repository: tez Updated Branches: refs/heads/master c96eed3e2 -> c924e8a25
TEZ-2397. Translation of LocalResources via Tez plan serialization can be lossy. (Siddharth Seth via hitesh) Project: http://git-wip-us.apache.org/repos/asf/tez/repo Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/c924e8a2 Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/c924e8a2 Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/c924e8a2 Branch: refs/heads/master Commit: c924e8a25d61be50e8c99f721108772b1c97c326 Parents: c96eed3 Author: Hitesh Shah <[email protected]> Authored: Fri May 1 11:46:55 2015 -0700 Committer: Hitesh Shah <[email protected]> Committed: Fri May 1 11:46:55 2015 -0700 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../apache/tez/dag/api/DagTypeConverters.java | 25 ++++++++------ .../tez/dag/api/TestDagTypeConverters.java | 35 ++++++++++++++++++++ 3 files changed, 51 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tez/blob/c924e8a2/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index fe90418..7c718ed 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -332,6 +332,7 @@ TEZ-UI CHANGES (TEZ-8): Release 0.5.4: Unreleased ALL CHANGES: + TEZ-2397. Translation of LocalResources via Tez plan serialization can be lossy. TEZ-2221. VertexGroup name should be unqiue TEZ-1521. VertexDataMovementEventsGeneratedEvent may be logged twice in recovery log TEZ-2348. EOF exception during UnorderedKVReader.next(). http://git-wip-us.apache.org/repos/asf/tez/blob/c924e8a2/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java ---------------------------------------------------------------------- diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java b/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java index b4185b1..4dc7b38 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java @@ -19,6 +19,7 @@ package org.apache.tez.dag.api; import java.io.DataOutputStream; import java.io.IOException; +import java.net.URISyntaxException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.HashMap; @@ -212,16 +213,20 @@ public class DagTypeConverters { return VertexLocationHint.create(outputList); } - // notes re HDFS URL handling: - // Resource URLs in the protobuf message are strings of the form hdfs://host:port/path - // org.apache.hadoop.fs.Path.Path is actually a URI type that allows any scheme - // org.apache.hadoop.yarn.api.records.URL is a URL type used by YARN. - // java.net.URL cannot be used out of the box as it rejects unknown schemes such as HDFS. - public static String convertToDAGPlan(URL resource) { - // see above notes on HDFS URL handling - return resource.getScheme() + "://" + resource.getHost() - + ":" + resource.getPort() + resource.getFile(); + Path p; + try { + p = ConverterUtils.getPathFromYarnURL(resource); + } catch (URISyntaxException e) { + throw new TezUncheckedException("Unable to translate resource: " + resource + " to Path"); + } + String urlString = p.toString(); + return urlString; + } + + public static URL convertToYarnURL(String pathString) { + Path path = new Path(pathString); + return ConverterUtils.getYarnUrlFromPath(path); } public static Map<String, LocalResource> createLocalResourceMapFromDAGPlan( @@ -235,7 +240,7 @@ public class DagTypeConverters { if(res.hasPattern()){ r.setPattern(res.getPattern()); } - r.setResource(ConverterUtils.getYarnUrlFromPath(new Path(res.getUri()))); // see above notes on HDFS URL handling + r.setResource(convertToYarnURL(res.getUri())); r.setSize(res.getSize()); r.setTimestamp(res.getTimeStamp()); r.setType(DagTypeConverters.convertFromDAGPlan(res.getType())); http://git-wip-us.apache.org/repos/asf/tez/blob/c924e8a2/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java ---------------------------------------------------------------------- diff --git a/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java b/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java index 5d07eea..51b179a 100644 --- a/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java +++ b/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java @@ -21,6 +21,10 @@ package org.apache.tez.dag.api; import java.io.IOException; import java.nio.ByteBuffer; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.api.records.URL; +import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.dag.api.records.DAGProtos.TezEntityDescriptorProto; import org.junit.Assert; @@ -54,4 +58,35 @@ public class TestDagTypeConverters { Assert.assertEquals(historytext, actualHistoryText); } + @Test(timeout = 5000) + public void testYarnPathTranslation() { + // Without port + String p1String = "hdfs://mycluster/file"; + Path p1Path = new Path(p1String); + // Users would translate this via this mechanic. + URL lr1Url = ConverterUtils.getYarnUrlFromPath(p1Path); + // Serialize to dag plan. + String p1StringSerialized = DagTypeConverters.convertToDAGPlan(lr1Url); + // Deserialize + URL lr1UrlDeserialized = DagTypeConverters.convertToYarnURL(p1StringSerialized); + Assert.assertEquals("mycluster", lr1UrlDeserialized.getHost()); + Assert.assertEquals("/file", lr1UrlDeserialized.getFile()); + Assert.assertEquals("hdfs", lr1UrlDeserialized.getScheme()); + + + // With port + String p2String = "hdfs://mycluster:2311/file"; + Path p2Path = new Path(p2String); + // Users would translate this via this mechanic. + URL lr2Url = ConverterUtils.getYarnUrlFromPath(p2Path); + // Serialize to dag plan. + String p2StringSerialized = DagTypeConverters.convertToDAGPlan(lr2Url); + // Deserialize + URL lr2UrlDeserialized = DagTypeConverters.convertToYarnURL(p2StringSerialized); + Assert.assertEquals("mycluster", lr2UrlDeserialized.getHost()); + Assert.assertEquals("/file", lr2UrlDeserialized.getFile()); + Assert.assertEquals("hdfs", lr2UrlDeserialized.getScheme()); + Assert.assertEquals(2311, lr2UrlDeserialized.getPort()); + } + }
