This is an automated email from the ASF dual-hosted git repository.

nixon pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/atlas.git

commit 845017b1a07af0a2682d0baff660b65cc637f963
Author: Nikhil Bonte <[email protected]>
AuthorDate: Tue Jun 30 18:15:24 2020 +0530

    ATLAS-3871 Add unit tests to path extractor utility for s3, s3 v2, adls 
path entities
    
    Signed-off-by: nixonrodrigues <[email protected]>
    (cherry picked from commit e72aa1076a5a1272935fed4dedaf9e29e41fa30b)
---
 .../atlas/utils/AtlasPathExtractorUtilTest.java    | 285 ++++++++++++++++++---
 1 file changed, 252 insertions(+), 33 deletions(-)

diff --git 
a/common/src/test/java/org/apache/atlas/utils/AtlasPathExtractorUtilTest.java 
b/common/src/test/java/org/apache/atlas/utils/AtlasPathExtractorUtilTest.java
index 664bfb7..4abdca4 100644
--- 
a/common/src/test/java/org/apache/atlas/utils/AtlasPathExtractorUtilTest.java
+++ 
b/common/src/test/java/org/apache/atlas/utils/AtlasPathExtractorUtilTest.java
@@ -22,6 +22,7 @@ import org.apache.atlas.model.instance.AtlasEntity;
 import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 import org.apache.hadoop.fs.Path;
 
@@ -53,56 +54,68 @@ public class AtlasPathExtractorUtilTest {
     private static final String OZONE_KEY         = "ozone_key";
     private static final String OZONE_SCHEME      = "ofs" + SCHEME_SEPARATOR;
     private static final String OZONE_3_SCHEME    = "o3fs" + SCHEME_SEPARATOR;
-    private static final String OZONE_PATH        = OZONE_SCHEME + 
"bucket1.volume1.ozone1/files/file.txt";
-    private static final String OZONE_3_PATH      = OZONE_3_SCHEME + 
"bucket1.volume1.ozone1/files/file.txt";
 
     // HDFS
     private static final String HDFS_SCHEME    = "hdfs" + SCHEME_SEPARATOR;
     private static final String HDFS_PATH      = HDFS_SCHEME + 
"host_name:8020/warehouse/tablespace/external/hive/taBlE_306";
 
-    @Test
-    public void testGetPathEntityOzone3Path() {
-        PathExtractorContext extractorContext = new 
PathExtractorContext(METADATA_NAMESPACE);
-
-        Path path = new Path(OZONE_3_PATH);
-        AtlasEntityWithExtInfo entityWithExtInfo = 
AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
-        AtlasEntity entity = entityWithExtInfo.getEntity();
-
-        assertNotNull(entity);
-        assertEquals(entity.getTypeName(), OZONE_KEY);
-        verifyOzoneKeyEntity(OZONE_3_PATH, entity);
-
-        assertEquals(entityWithExtInfo.getReferredEntities().size(), 2);
-        verifyOzoneEntities(OZONE_3_SCHEME, OZONE_3_PATH, 
extractorContext.getKnownEntities());
-
-        assertEquals(extractorContext.getKnownEntities().size(), 3);
-        verifyOzoneEntities(OZONE_3_SCHEME, OZONE_3_PATH, 
extractorContext.getKnownEntities());
+    // ADLS Gen2
+    private static final String ADLS_GEN2_ACCOUNT       = "adls_gen2_account";
+    private static final String ADLS_GEN2_CONTAINER     = 
"adls_gen2_container";
+    private static final String ADLS_GEN2_DIRECTORY     = 
"adls_gen2_directory";
+    private static final String ABFS_SCHEME             = "abfs" + 
SCHEME_SEPARATOR;
+    private static final String ABFSS_SCHEME            = "abfss" + 
SCHEME_SEPARATOR;
+    private static final String ABFS_PATH               = ABFS_SCHEME + 
"[email protected]/tmp/cdp-demo/sample.csv";
+    private static final String ABFSS_PATH              = ABFSS_SCHEME + 
"[email protected]/tmp/cdp-demo/sample.csv";
+
+    // AWS S3
+    private static final String AWS_S3_ATLAS_MODEL_VERSION_V2    = "V2";
+    private static final String AWS_S3_BUCKET                    = 
"aws_s3_bucket";
+    private static final String AWS_S3_PSEUDO_DIR                = 
"aws_s3_pseudo_dir";
+    private static final String AWS_S3_V2_BUCKET                 = 
"aws_s3_v2_bucket";
+    private static final String AWS_S3_V2_PSEUDO_DIR             = 
"aws_s3_v2_directory";
+    private static final String S3_SCHEME                        = "s3" + 
SCHEME_SEPARATOR;
+    private static final String S3A_SCHEME                       = "s3a" + 
SCHEME_SEPARATOR;
+    private static final String ATTRIBUTE_OBJECT_PREFIX          = 
"objectPrefix";
+    private static final String S3_PATH                          = S3_SCHEME + 
"aws_my_bucket1/1234567890/renders/Irradiance_A.csv";
+    private static final String S3A_PATH                         = S3A_SCHEME 
+ "aws_my_bucket1/1234567890/renders/Irradiance_A.csv";
+
+    @DataProvider(name = "ozonePathProvider")
+    private Object[][] ozonePathProvider(){
+        return new Object[][]{
+                { OZONE_SCHEME, "bucket1.volume1.ozone1/files/file.txt", 
"/files/file.txt" },
+                { OZONE_SCHEME, "bucket1.volume1.ozone1/file21.txt", 
"/file21.txt" },
+                { OZONE_SCHEME, "bucket1.volume1.ozone1/quarter_one/sales", 
"/quarter_one/sales" },
+                { OZONE_SCHEME, "bucket1.volume1.ozone1/quarter_one/sales/", 
"/quarter_one/sales" },
+                { OZONE_3_SCHEME, "bucket1.volume1.ozone1/files/file.txt", 
"/files/file.txt" },
+                { OZONE_3_SCHEME, "bucket1.volume1.ozone1/file21.txt", 
"/file21.txt"},
+                { OZONE_3_SCHEME, "bucket1.volume1.ozone1/quarter_one/sales", 
"/quarter_one/sales" },
+                { OZONE_3_SCHEME, "bucket1.volume1.ozone1/quarter_one/sales/", 
"/quarter_one/sales" },
+        };
     }
 
-    @Test
-    public void testGetPathEntityOzonePath() {
+    @Test(dataProvider = "ozonePathProvider")
+    public void testGetPathEntityOzone3Path(String scheme, String location, 
String keyName) {
+        String ozonePath = scheme + location;
         PathExtractorContext extractorContext = new 
PathExtractorContext(METADATA_NAMESPACE);
 
-        Path path = new Path(OZONE_PATH);
+        Path path = new Path(ozonePath);
         AtlasEntityWithExtInfo entityWithExtInfo = 
AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
         AtlasEntity entity = entityWithExtInfo.getEntity();
 
         assertNotNull(entity);
         assertEquals(entity.getTypeName(), OZONE_KEY);
-        verifyOzoneKeyEntity(OZONE_PATH, entity);
+        verifyOzoneKeyEntity(ozonePath, keyName, entity);
 
         assertEquals(entityWithExtInfo.getReferredEntities().size(), 2);
-        verifyOzoneEntities(OZONE_SCHEME, OZONE_PATH, 
extractorContext.getKnownEntities());
+        verifyOzoneEntities(scheme, ozonePath, keyName, 
entityWithExtInfo.getReferredEntities());
 
         assertEquals(extractorContext.getKnownEntities().size(), 3);
-        verifyOzoneEntities(OZONE_SCHEME, OZONE_PATH, 
extractorContext.getKnownEntities());
+        verifyOzoneEntities(scheme, ozonePath, keyName, 
extractorContext.getKnownEntities());
     }
 
     @Test
     public void testGetPathEntityHdfsPath() {
-        Map<String, AtlasEntity> knownEntities = new HashMap<>();
-        AtlasEntityWithExtInfo extInfo = new AtlasEntityWithExtInfo();
-
         PathExtractorContext extractorContext = new 
PathExtractorContext(METADATA_NAMESPACE);
 
         Path path = new Path(HDFS_PATH);
@@ -113,7 +126,7 @@ public class AtlasPathExtractorUtilTest {
         assertEquals(entity.getTypeName(), HDFS_PATH_TYPE);
         verifyHDFSEntity(entity, false);
 
-        assertNull(extInfo.getReferredEntities());
+        assertNull(entityWithExtInfo.getReferredEntities());
         assertEquals(extractorContext.getKnownEntities().size(), 1);
         extractorContext.getKnownEntities().values().forEach(x -> 
verifyHDFSEntity(x, false));
     }
@@ -135,11 +148,107 @@ public class AtlasPathExtractorUtilTest {
         extractorContext.getKnownEntities().values().forEach(x -> 
verifyHDFSEntity(x, true));
     }
 
-    private void verifyOzoneEntities(String scheme, String path, Map<String, 
AtlasEntity> knownEntities) {
+    @Test
+    public void testGetPathEntityABFSPath() {
+        PathExtractorContext extractorContext = new 
PathExtractorContext(METADATA_NAMESPACE);
+
+        Path path = new Path(ABFS_PATH);
+        AtlasEntityWithExtInfo entityWithExtInfo = 
AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
+        AtlasEntity entity = entityWithExtInfo.getEntity();
+
+        assertNotNull(entity);
+        assertEquals(entity.getTypeName(), ADLS_GEN2_DIRECTORY);
+        assertEquals(entityWithExtInfo.getReferredEntities().size(), 2);
+
+        verifyABFSAdlsGen2Dir(ABFS_SCHEME, ABFS_PATH, entity);
+        verifyABFSKnownEntities(ABFS_SCHEME, ABFS_PATH, 
extractorContext.getKnownEntities());
+    }
+
+    @Test
+    public void testGetPathEntityABFSSPath() {
+        PathExtractorContext extractorContext = new 
PathExtractorContext(METADATA_NAMESPACE);
+
+        Path path = new Path(ABFSS_PATH);
+        AtlasEntityWithExtInfo entityWithExtInfo = 
AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
+        AtlasEntity entity = entityWithExtInfo.getEntity();
+
+        assertNotNull(entity);
+        assertEquals(entity.getTypeName(), ADLS_GEN2_DIRECTORY);
+        assertEquals(entityWithExtInfo.getReferredEntities().size(), 2);
+
+        verifyABFSAdlsGen2Dir(ABFSS_SCHEME, ABFSS_PATH, entity);
+        verifyABFSKnownEntities(ABFSS_SCHEME, ABFSS_PATH, 
extractorContext.getKnownEntities());
+    }
+
+    @Test
+    public void testGetPathEntityS3V2Path() {
+        PathExtractorContext extractorContext = new 
PathExtractorContext(METADATA_NAMESPACE, AWS_S3_ATLAS_MODEL_VERSION_V2);
+
+        Path path = new Path(S3_PATH);
+        AtlasEntityWithExtInfo entityWithExtInfo = 
AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
+        AtlasEntity entity = entityWithExtInfo.getEntity();
+
+        assertNotNull(entity);
+        assertEquals(entity.getTypeName(), AWS_S3_V2_PSEUDO_DIR);
+        assertEquals(entityWithExtInfo.getReferredEntities().size(), 1);
+
+        verifyS3V2PseudoDir(S3A_SCHEME, S3_PATH, entity);
+        verifyS3V2KnownEntities(S3_SCHEME, S3_PATH, 
extractorContext.getKnownEntities());
+    }
+
+    @Test
+    public void testGetPathEntityS3AV2Path() {
+        PathExtractorContext extractorContext = new 
PathExtractorContext(METADATA_NAMESPACE, AWS_S3_ATLAS_MODEL_VERSION_V2);
+
+        Path path = new Path(S3A_PATH);
+        AtlasEntityWithExtInfo entityWithExtInfo = 
AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
+        AtlasEntity entity = entityWithExtInfo.getEntity();
+
+        assertNotNull(entity);
+        assertEquals(entity.getTypeName(), AWS_S3_V2_PSEUDO_DIR);
+        assertEquals(entityWithExtInfo.getReferredEntities().size(), 1);
+
+        verifyS3V2PseudoDir(S3A_SCHEME, S3A_PATH, entity);
+        verifyS3V2KnownEntities(S3A_SCHEME, S3A_PATH, 
extractorContext.getKnownEntities());
+    }
+
+    @Test
+    public void testGetPathEntityS3Path() {
+        PathExtractorContext extractorContext = new 
PathExtractorContext(METADATA_NAMESPACE);
+
+        Path path = new Path(S3_PATH);
+        AtlasEntityWithExtInfo entityWithExtInfo = 
AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
+        AtlasEntity entity = entityWithExtInfo.getEntity();
+
+        assertNotNull(entity);
+        assertEquals(entity.getTypeName(), AWS_S3_PSEUDO_DIR);
+        assertEquals(entityWithExtInfo.getReferredEntities().size(), 1);
+
+        verifyS3PseudoDir(S3_PATH, entity);
+        verifyS3KnownEntities(S3_SCHEME, S3_PATH, 
extractorContext.getKnownEntities());
+    }
+
+    @Test
+    public void testGetPathEntityS3APath() {
+        PathExtractorContext extractorContext = new 
PathExtractorContext(METADATA_NAMESPACE);
+
+        Path path = new Path(S3A_PATH);
+        AtlasEntityWithExtInfo entityWithExtInfo = 
AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
+        AtlasEntity entity = entityWithExtInfo.getEntity();
+
+        assertNotNull(entity);
+        assertEquals(entity.getTypeName(), AWS_S3_PSEUDO_DIR);
+        assertEquals(entityWithExtInfo.getReferredEntities().size(), 1);
+
+        verifyS3PseudoDir(S3A_PATH, entity);
+        verifyS3KnownEntities(S3A_SCHEME, S3A_PATH, 
extractorContext.getKnownEntities());
+    }
+
+    private void verifyOzoneEntities(String scheme, String path, String 
keyName, Map<String, AtlasEntity> knownEntities) {
         for (AtlasEntity knownEntity : knownEntities.values()) {
             switch (knownEntity.getTypeName()){
                 case OZONE_KEY:
-                    verifyOzoneKeyEntity(path, knownEntity);
+                    verifyOzoneKeyEntity(path, keyName, knownEntity);
                     break;
 
                 case OZONE_VOLUME:
@@ -155,9 +264,11 @@ public class AtlasPathExtractorUtilTest {
         }
     }
 
-    private void verifyOzoneKeyEntity(String path, AtlasEntity entity) {
+    private void verifyOzoneKeyEntity(String path, String name, AtlasEntity 
entity) {
+        //remove trailing "/" if present from path
+        path = (path.charAt(path.length()-1) == '/') ? path.substring(0, 
path.length()-1) : path;
         assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), path + 
QNAME_METADATA_NAMESPACE);
-        assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "/files/file.txt");
+        assertEquals(entity.getAttribute(ATTRIBUTE_NAME), name);
     }
 
     private void verifyHDFSEntity(AtlasEntity entity, boolean toLowerCase) {
@@ -173,4 +284,112 @@ public class AtlasPathExtractorUtilTest {
             assertEquals(entity.getAttribute(ATTRIBUTE_CLUSTER_NAME), 
METADATA_NAMESPACE);
         }
     }
+
+    private void verifyABFSAdlsGen2Dir(String abfsScheme, String path, 
AtlasEntity entity){
+        String pathQName = abfsScheme + 
"data@razrangersan/tmp/cdp-demo/sample.csv/" + QNAME_METADATA_NAMESPACE;
+        String entityQName = (String) 
entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME);
+
+        if (pathQName.equalsIgnoreCase(entityQName)){
+            assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "sample.csv");
+        } else {
+            pathQName = abfsScheme + "data@razrangersan/tmp/cdp-demo/" + 
QNAME_METADATA_NAMESPACE;
+            if (pathQName.equalsIgnoreCase(entityQName)){
+                assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "cdp-demo");
+            } else {
+                assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), 
abfsScheme + "data@razrangersan/tmp/" + QNAME_METADATA_NAMESPACE);
+                assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "tmp");
+            }
+        }
+    }
+
+    private void verifyABFSKnownEntities(String scheme, String path, 
Map<String, AtlasEntity> knownEntities) {
+        assertEquals(knownEntities.size(), 5);
+        int directoryCount = 0;
+        for (AtlasEntity knownEntity : knownEntities.values()) {
+            switch (knownEntity.getTypeName()){
+                case ADLS_GEN2_DIRECTORY:
+                    verifyABFSAdlsGen2Dir(scheme, path, knownEntity);
+                    directoryCount++;
+                    break;
+
+                case ADLS_GEN2_CONTAINER:
+                    
assertEquals(knownEntity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), scheme + 
"data@razrangersan" + QNAME_METADATA_NAMESPACE);
+                    assertEquals(knownEntity.getAttribute(ATTRIBUTE_NAME), 
"data");
+                    break;
+
+                case ADLS_GEN2_ACCOUNT:
+                    
assertEquals(knownEntity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), scheme + 
"razrangersan" + QNAME_METADATA_NAMESPACE);
+                    assertEquals(knownEntity.getAttribute(ATTRIBUTE_NAME), 
"razrangersan");
+                    break;
+            }
+        }
+        assertEquals(directoryCount, 3);
+    }
+
+    private void verifyS3V2PseudoDir(String s3Scheme, String path, AtlasEntity 
entity){
+        String pathQName = path + "/" + QNAME_METADATA_NAMESPACE;
+        String entityQName = (String) 
entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME);
+
+        if (pathQName.equalsIgnoreCase(entityQName)){
+            assertEquals(entity.getAttribute(ATTRIBUTE_NAME), 
"Irradiance_A.csv");
+            assertEquals(entity.getAttribute(ATTRIBUTE_OBJECT_PREFIX), 
"/1234567890/renders/Irradiance_A.csv/");
+        } else {
+            pathQName = s3Scheme + "aws_my_bucket1/1234567890/" + 
QNAME_METADATA_NAMESPACE;
+            if (pathQName.equalsIgnoreCase(entityQName)){
+                assertEquals(entity.getAttribute(ATTRIBUTE_NAME), 
"1234567890");
+                assertEquals(entity.getAttribute(ATTRIBUTE_OBJECT_PREFIX), 
"/1234567890/");
+            } else {
+                assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), 
s3Scheme + "aws_my_bucket1/1234567890/renders/" + QNAME_METADATA_NAMESPACE);
+                assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "renders");
+                assertEquals(entity.getAttribute(ATTRIBUTE_OBJECT_PREFIX), 
"/1234567890/renders/");
+            }
+        }
+    }
+
+    private void verifyS3V2KnownEntities(String scheme, String path, 
Map<String, AtlasEntity> knownEntities) {
+        assertEquals(knownEntities.size(), 4);
+        int dirCount = 0;
+        for (AtlasEntity knownEntity : knownEntities.values()) {
+            switch (knownEntity.getTypeName()){
+                case AWS_S3_V2_PSEUDO_DIR:
+                    verifyS3V2PseudoDir(scheme, path, knownEntity);
+                    dirCount++;
+                    break;
+
+                case AWS_S3_V2_BUCKET:
+                    verifyS3BucketEntity(scheme, knownEntity);
+                    break;
+            }
+        }
+        assertEquals(dirCount, 3);
+    }
+
+    private void verifyS3PseudoDir(String path, AtlasEntity entity){
+        assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), 
path.toLowerCase() + QNAME_METADATA_NAMESPACE);
+        assertEquals(entity.getAttribute(ATTRIBUTE_NAME), 
"/1234567890/renders/irradiance_a.csv");
+        assertEquals(entity.getAttribute(ATTRIBUTE_OBJECT_PREFIX), 
"/1234567890/renders/irradiance_a.csv");
+    }
+
+    private void verifyS3KnownEntities(String scheme, String path, Map<String, 
AtlasEntity> knownEntities) {
+        assertEquals(knownEntities.size(), 2);
+        int dirCount = 0;
+        for (AtlasEntity knownEntity : knownEntities.values()) {
+            switch (knownEntity.getTypeName()){
+                case AWS_S3_PSEUDO_DIR:
+                    verifyS3PseudoDir(path, knownEntity);
+                    dirCount++;
+                    break;
+
+                case AWS_S3_BUCKET:
+                    verifyS3BucketEntity(scheme, knownEntity);
+                    break;
+            }
+        }
+        assertEquals(dirCount, 1);
+    }
+
+    private void verifyS3BucketEntity(String scheme, AtlasEntity entity) {
+        assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), scheme + 
"aws_my_bucket1" + QNAME_METADATA_NAMESPACE);
+        assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "aws_my_bucket1");
+    }
 }

Reply via email to