This is an automated email from the ASF dual-hosted git repository.

stevel pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new d3b98cb1b23 HADOOP-19194:Add test to find unshaded dependencies in the 
aws sdk (#6865)
d3b98cb1b23 is described below

commit d3b98cb1b23841a57b966c5cedab312687f098cb
Author: HarshitGupta11 <50410275+harshitgupt...@users.noreply.github.com>
AuthorDate: Mon Jun 24 15:11:11 2024 +0530

    HADOOP-19194:Add test to find unshaded dependencies in the aws sdk (#6865)
    
    
    The new test TestAWSV2SDK scans the aws sdk bundle.jar and prints out all 
classes
    which are unshaded, so at risk of creating classpath problems
    
    It does not fail the test if this holds, because the current SDKs
    do ship with unshaded classes; the test would always fail.
    
    The SDK upgrade process should include inspecting the output
    of this test to see if it has got worse (do a before/after check).
    
    Once the AWS SDK does shade everything, we can have this
    test fail on any regression
    
    Contributed by Harshit Gupta
---
 .../src/site/markdown/tools/hadoop-aws/testing.md  |  1 +
 .../org/apache/hadoop/fs/sdk/TestAWSV2SDK.java     | 94 ++++++++++++++++++++++
 2 files changed, 95 insertions(+)

diff --git 
a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md 
b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
index 45d1c847657..7222eee98ba 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
@@ -1184,6 +1184,7 @@ your IDE or via maven.
 1. Run a full AWS-test suite with S3 client-side encryption enabled by
  setting `fs.s3a.encryption.algorithm` to 'CSE-KMS' and setting up AWS-KMS
   Key ID in `fs.s3a.encryption.key`.
+2. Verify that the output of test `TestAWSV2SDK` doesn't contain any unshaded 
classes.
 
 The dependency chain of the `hadoop-aws` module should be similar to this, 
albeit
 with different version numbers:
diff --git 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/sdk/TestAWSV2SDK.java
 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/sdk/TestAWSV2SDK.java
new file mode 100644
index 00000000000..fca9fcc300c
--- /dev/null
+++ 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/sdk/TestAWSV2SDK.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.sdk;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.List;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+
+import org.junit.Test;
+import org.apache.hadoop.test.AbstractHadoopTestBase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/**
+ * Tests to verify AWS SDK based issues like duplicated shaded classes and 
others.
+ */
+public class TestAWSV2SDK extends AbstractHadoopTestBase {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(TestAWSV2SDK.class.getName());
+
+  @Test
+  public void testShadedClasses() throws IOException {
+    String allClassPath = System.getProperty("java.class.path");
+    LOG.debug("Current classpath:{}", allClassPath);
+    String[] classPaths = allClassPath.split(File.pathSeparator);
+    String v2ClassPath = null;
+    for (String classPath : classPaths) {
+      //Checking for only version 2.x sdk here
+      if (classPath.contains("awssdk/bundle/2")) {
+        v2ClassPath = classPath;
+        break;
+      }
+    }
+    LOG.debug("AWS SDK V2 Classpath:{}", v2ClassPath);
+    assertThat(v2ClassPath)
+            .as("AWS V2 SDK should be present on the classpath").isNotNull();
+    List<String> listOfV2SdkClasses = getClassNamesFromJarFile(v2ClassPath);
+    String awsSdkPrefix = "software/amazon/awssdk";
+    List<String> unshadedClasses = new ArrayList<>();
+    for (String awsSdkClass : listOfV2SdkClasses) {
+      if (!awsSdkClass.startsWith(awsSdkPrefix)) {
+        unshadedClasses.add(awsSdkClass);
+      }
+    }
+    if (!unshadedClasses.isEmpty()) {
+      LOG.warn("Unshaded Classes Found :{}", unshadedClasses.size());
+      LOG.warn("List of unshaded classes:{}", unshadedClasses);
+    } else {
+      LOG.info("No Unshaded classes found in the sdk.");
+    }
+  }
+
+  /**
+   * Returns the list of classes in a jar file.
+   * @param jarFilePath: the location of the jar file from absolute path
+   * @return a list of classes contained by the jar file
+   * @throws IOException if the file is not present or the path is not readable
+   */
+  private List<String> getClassNamesFromJarFile(String jarFilePath) throws 
IOException {
+    List<String> classNames = new ArrayList<>();
+    try (JarFile jarFile = new JarFile(new File(jarFilePath))) {
+      Enumeration<JarEntry> jarEntryEnumeration = jarFile.entries();
+      while (jarEntryEnumeration.hasMoreElements()) {
+        JarEntry jarEntry = jarEntryEnumeration.nextElement();
+        if (jarEntry.getName().endsWith(".class")) {
+          classNames.add(jarEntry.getName());
+        }
+      }
+    }
+    return classNames;
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to