This is an automated email from the ASF dual-hosted git repository.
arjun4084346 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new eb9718436 add dataset root in some common type of datasets
new cea7cda17 Merge pull request #3802 from arjun4084346/datasetRoot
eb9718436 is described below
commit eb971843600596703de4440492156349c1932155
Author: Arjun <[email protected]>
AuthorDate: Wed Oct 18 00:14:04 2023 -0700
add dataset root in some common type of datasets
---
.../gobblin/data/management/copy/RecursiveCopyableDataset.java | 5 +++++
.../gobblin/data/management/copy/iceberg/IcebergDataset.java | 9 +++++++++
.../gobblin/data/management/copy/iceberg/IcebergTable.java | 6 ++----
3 files changed, 16 insertions(+), 4 deletions(-)
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java
index 1a7e0d069..6b181011a 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java
@@ -237,4 +237,9 @@ public class RecursiveCopyableDataset implements
CopyableDataset, FileSystemData
return fileInTarget.getLen() == fileInSource.getLen() &&
fileInSource.getModificationTime() <= fileInTarget
.getModificationTime();
}
+
+ @Override
+ public String getDatasetPath() {
+ return Path.getPathWithoutSchemeAndAuthority(this.rootPath).toString();
+ }
}
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/iceberg/IcebergDataset.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/iceberg/IcebergDataset.java
index 05f1e265d..a59fc3688 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/iceberg/IcebergDataset.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/iceberg/IcebergDataset.java
@@ -89,6 +89,15 @@ public class IcebergDataset implements
PrioritizedCopyableDataset {
return this.getFileSetId();
}
+ @Override
+ public String getDatasetPath() {
+ try {
+ return this.destIcebergTable.accessTableMetadata().location();
+ } catch (IcebergTable.TableNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
/**
* Finds all files read by the table and generates CopyableFiles.
* For the specific semantics see {@link #createFileSets}.
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/iceberg/IcebergTable.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/iceberg/IcebergTable.java
index 529f53a45..4fe1840fd 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/iceberg/IcebergTable.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/iceberg/IcebergTable.java
@@ -178,11 +178,8 @@ public class IcebergTable {
}
protected static List<String> discoverDataFilePaths(ManifestFile manifest,
FileIO io) throws IOException {
- CloseableIterable<String> manifestPathsIterable =
ManifestFiles.readPaths(manifest, io);
- try {
+ try (CloseableIterable<String> manifestPathsIterable =
ManifestFiles.readPaths(manifest, io)) {
return Lists.newArrayList(manifestPathsIterable);
- } finally {
- manifestPathsIterable.close();
}
}
protected DatasetDescriptor getDatasetDescriptor(FileSystem fs) {
@@ -194,6 +191,7 @@ public class IcebergTable {
descriptor.addMetadata(DatasetConstants.FS_URI, fs.getUri().toString());
return descriptor;
}
+
/** Registers {@link IcebergTable} after publishing data.
* @param dstMetadata is null if destination {@link IcebergTable} is absent,
in which case registration is skipped */
protected void registerIcebergTable(TableMetadata srcMetadata, TableMetadata
dstMetadata) {