This is an automated email from the ASF dual-hosted git repository.
dweeks pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 9be7f00dd6 Fix ADLSLocation file parsing (#11395)
9be7f00dd6 is described below
commit 9be7f00dd6a9fb480a94c46d49473334908be859
Author: Marc Cenac <[email protected]>
AuthorDate: Tue Nov 5 17:55:56 2024 -0600
Fix ADLSLocation file parsing (#11395)
* Azure: Fix ADLSLocation file parsing
* Azure: Remove invalid test cases from ADLSLocationTest
* Update Javadocs with reference to ADLS URI
---
.../apache/iceberg/azure/adlsv2/ADLSLocation.java | 13 ++++++------
.../iceberg/azure/adlsv2/ADLSLocationTest.java | 24 ++++++----------------
2 files changed, 13 insertions(+), 24 deletions(-)
diff --git
a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java
b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java
index e73093512b..5af590628f 100644
--- a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java
+++ b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java
@@ -25,14 +25,16 @@ import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
/**
- * This class represents a fully qualified location in Azure expressed as a
URI.
+ * This class represents a fully qualified location to a file or directory in
Azure Data Lake
+ * Storage Gen2 storage.
*
- * <p>Locations follow the conventions used by Hadoop's Azure support, i.e.
+ * <p>Locations follow a URI like structure to identify resources
*
* <pre>{@code abfs[s]://[<container>@]<storage account host>/<file
path>}</pre>
*
- * <p>See <a
href="https://hadoop.apache.org/docs/stable/hadoop-azure/abfs.html">Hadoop Azure
- * Support</a>
+ * <p>See <a
+ *
href="https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri#uri-syntax">Azure
+ * Data Lake Storage URI</a>
*/
class ADLSLocation {
private static final Pattern URI_PATTERN =
Pattern.compile("^abfss?://([^/?#]+)(.*)?$");
@@ -64,8 +66,7 @@ class ADLSLocation {
}
String uriPath = matcher.group(2);
- uriPath = uriPath == null ? "" : uriPath.startsWith("/") ?
uriPath.substring(1) : uriPath;
- this.path = uriPath.split("\\?", -1)[0].split("#", -1)[0];
+ this.path = uriPath == null ? "" : uriPath.startsWith("/") ?
uriPath.substring(1) : uriPath;
}
/** Returns Azure storage account. */
diff --git
a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java
b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java
index 867b54b4c7..403886f4b2 100644
--- a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java
+++ b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java
@@ -82,23 +82,11 @@ public class ADLSLocationTest {
assertThat(location.path()).isEqualTo("");
}
- @Test
- public void testQueryAndFragment() {
- String p1 =
"abfs://[email protected]/path/to/file?query=foo#123";
- ADLSLocation location = new ADLSLocation(p1);
-
-
assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net");
- assertThat(location.container().get()).isEqualTo("container");
- assertThat(location.path()).isEqualTo("path/to/file");
- }
-
- @Test
- public void testQueryAndFragmentNoPath() {
- String p1 = "abfs://[email protected]?query=foo#123";
- ADLSLocation location = new ADLSLocation(p1);
-
-
assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net");
- assertThat(location.container().get()).isEqualTo("container");
- assertThat(location.path()).isEqualTo("");
+ @ParameterizedTest
+ @ValueSource(strings = {"file?.txt", "file%3F.txt"})
+ public void testQuestionMarkInFileName(String path) {
+ String fullPath =
String.format("abfs://[email protected]/%s", path);
+ ADLSLocation location = new ADLSLocation(fullPath);
+ assertThat(location.path()).contains(path);
}
}