This is an automated email from the ASF dual-hosted git repository.

dweeks pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new 9be7f00dd6 Fix ADLSLocation file parsing (#11395)
9be7f00dd6 is described below

commit 9be7f00dd6a9fb480a94c46d49473334908be859
Author: Marc Cenac <[email protected]>
AuthorDate: Tue Nov 5 17:55:56 2024 -0600

    Fix ADLSLocation file parsing (#11395)
    
    * Azure: Fix ADLSLocation file parsing
    
    * Azure: Remove invalid test cases from ADLSLocationTest
    
    * Update Javadocs with reference to ADLS URI
---
 .../apache/iceberg/azure/adlsv2/ADLSLocation.java  | 13 ++++++------
 .../iceberg/azure/adlsv2/ADLSLocationTest.java     | 24 ++++++----------------
 2 files changed, 13 insertions(+), 24 deletions(-)

diff --git 
a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java 
b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java
index e73093512b..5af590628f 100644
--- a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java
+++ b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java
@@ -25,14 +25,16 @@ import org.apache.iceberg.exceptions.ValidationException;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 
 /**
- * This class represents a fully qualified location in Azure expressed as a 
URI.
+ * This class represents a fully qualified location to a file or directory in 
Azure Data Lake
+ * Storage Gen2 storage.
  *
- * <p>Locations follow the conventions used by Hadoop's Azure support, i.e.
+ * <p>Locations follow a URI like structure to identify resources
  *
  * <pre>{@code abfs[s]://[<container>@]<storage account host>/<file 
path>}</pre>
  *
- * <p>See <a 
href="https://hadoop.apache.org/docs/stable/hadoop-azure/abfs.html";>Hadoop Azure
- * Support</a>
+ * <p>See <a
+ * 
href="https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri#uri-syntax";>Azure
+ * Data Lake Storage URI</a>
  */
 class ADLSLocation {
   private static final Pattern URI_PATTERN = 
Pattern.compile("^abfss?://([^/?#]+)(.*)?$");
@@ -64,8 +66,7 @@ class ADLSLocation {
     }
 
     String uriPath = matcher.group(2);
-    uriPath = uriPath == null ? "" : uriPath.startsWith("/") ? 
uriPath.substring(1) : uriPath;
-    this.path = uriPath.split("\\?", -1)[0].split("#", -1)[0];
+    this.path = uriPath == null ? "" : uriPath.startsWith("/") ? 
uriPath.substring(1) : uriPath;
   }
 
   /** Returns Azure storage account. */
diff --git 
a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java 
b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java
index 867b54b4c7..403886f4b2 100644
--- a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java
+++ b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java
@@ -82,23 +82,11 @@ public class ADLSLocationTest {
     assertThat(location.path()).isEqualTo("");
   }
 
-  @Test
-  public void testQueryAndFragment() {
-    String p1 = 
"abfs://[email protected]/path/to/file?query=foo#123";
-    ADLSLocation location = new ADLSLocation(p1);
-
-    
assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net");
-    assertThat(location.container().get()).isEqualTo("container");
-    assertThat(location.path()).isEqualTo("path/to/file");
-  }
-
-  @Test
-  public void testQueryAndFragmentNoPath() {
-    String p1 = "abfs://[email protected]?query=foo#123";
-    ADLSLocation location = new ADLSLocation(p1);
-
-    
assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net");
-    assertThat(location.container().get()).isEqualTo("container");
-    assertThat(location.path()).isEqualTo("");
+  @ParameterizedTest
+  @ValueSource(strings = {"file?.txt", "file%3F.txt"})
+  public void testQuestionMarkInFileName(String path) {
+    String fullPath = 
String.format("abfs://[email protected]/%s", path);
+    ADLSLocation location = new ADLSLocation(fullPath);
+    assertThat(location.path()).contains(path);
   }
 }

Reply via email to