>From Peeyush Gupta <[email protected]>:

Peeyush Gupta has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19047 )


Change subject: wip: use delta kernel api instead of delta standalone
......................................................................

wip: use delta kernel api instead of delta standalone

Change-Id: I0017e63ac0bddcfa0b342d9380d55934a76c12ec
---
M asterixdb/asterix-external-data/pom.xml
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/delta/AwsS3DeltaReaderFactory.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
M asterixdb/pom.xml
M hyracks-fullstack/pom.xml
5 files changed, 46 insertions(+), 12 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/47/19047/1

diff --git a/asterixdb/asterix-external-data/pom.xml 
b/asterixdb/asterix-external-data/pom.xml
index 8c8ad10..a388054 100644
--- a/asterixdb/asterix-external-data/pom.xml
+++ b/asterixdb/asterix-external-data/pom.xml
@@ -579,6 +579,16 @@
     </dependency>
     <dependency>
       <groupId>io.delta</groupId>
+      <artifactId>delta-kernel-api</artifactId>
+      <version>3.2.1</version>
+    </dependency>
+    <dependency>
+      <groupId>io.delta</groupId>
+      <artifactId>delta-kernel-defaults</artifactId>
+      <version>3.2.1</version>
+    </dependency>
+    <dependency>
+      <groupId>io.delta</groupId>
       <artifactId>delta-standalone_2.12</artifactId>
       <version>3.0.0</version>
     </dependency>
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/delta/AwsS3DeltaReaderFactory.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/delta/AwsS3DeltaReaderFactory.java
index f5a2cd5..a0d1e49 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/delta/AwsS3DeltaReaderFactory.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/delta/AwsS3DeltaReaderFactory.java
@@ -47,6 +47,9 @@
         Configuration conf = new Configuration();
         conf.set(S3Constants.HADOOP_ACCESS_KEY_ID, 
configuration.get(S3Constants.ACCESS_KEY_ID_FIELD_NAME));
         conf.set(S3Constants.HADOOP_SECRET_ACCESS_KEY, 
configuration.get(S3Constants.SECRET_ACCESS_KEY_FIELD_NAME));
+        if (configuration.containsKey(S3Constants.SESSION_TOKEN_FIELD_NAME)) {
+            conf.set(S3Constants.HADOOP_SESSION_TOKEN, 
configuration.get(S3Constants.SESSION_TOKEN_FIELD_NAME));
+        }
         conf.set(S3Constants.HADOOP_REGION, 
configuration.get(S3Constants.REGION_FIELD_NAME));
         String serviceEndpoint = 
configuration.get(SERVICE_END_POINT_FIELD_NAME);
         if (serviceEndpoint != null) {
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index c362f75..732f3c5 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -109,9 +109,13 @@
 import org.apache.iceberg.hadoop.HadoopTables;
 import org.apache.iceberg.io.CloseableIterable;

-import io.delta.standalone.DeltaLog;
-import io.delta.standalone.Snapshot;
-import io.delta.standalone.actions.AddFile;
+import io.delta.kernel.Scan;
+import io.delta.kernel.Snapshot;
+import io.delta.kernel.data.FilteredColumnarBatch;
+import io.delta.kernel.data.Row;
+import io.delta.kernel.defaults.engine.DefaultEngine;
+import io.delta.kernel.engine.Engine;
+import io.delta.kernel.utils.CloseableIterator;

 public class ExternalDataUtils {
     private static final Map<ATypeTag, IValueParserFactory> 
valueParserFactoryMap = new EnumMap<>(ATypeTag.class);
@@ -503,14 +507,22 @@

     public static void prepareDeltaTableFormat(Map<String, String> 
configuration, Configuration conf,
             String tableMetadataPath) {
-        DeltaLog deltaLog = DeltaLog.forTable(conf, tableMetadataPath);
-        Snapshot snapshot = deltaLog.snapshot();
-        List<AddFile> dataFiles = snapshot.getAllFiles();
+        Engine engine = DefaultEngine.create(conf);
+        io.delta.kernel.Table table = io.delta.kernel.Table.forPath(engine, 
tableMetadataPath);
+        Snapshot snapshot = table.getLatestSnapshot(engine);
+        Scan scan = snapshot.getScanBuilder(engine).withReadSchema(engine, 
snapshot.getSchema(engine)).build();
+        CloseableIterator<FilteredColumnarBatch> iter = 
scan.getScanFiles(engine);
         StringBuilder builder = new StringBuilder();
-        for (AddFile batchFile : dataFiles) {
-            builder.append(",");
-            String path = batchFile.getPath();
-            builder.append(tableMetadataPath).append('/').append(path);
+        while (iter.hasNext()) {
+            FilteredColumnarBatch batch = iter.next();
+            CloseableIterator<Row> rowIter = batch.getRows();
+            while (rowIter.hasNext()) {
+                Row row = rowIter.next();
+                Row addFile = row.getStruct(0);
+                builder.append(",");
+                String path = addFile.getString(0);
+                builder.append(tableMetadataPath).append('/').append(path);
+            }
         }
         if (builder.length() > 0) {
             builder.deleteCharAt(0);
diff --git a/asterixdb/pom.xml b/asterixdb/pom.xml
index 94bd6e1..e8b27c9 100644
--- a/asterixdb/pom.xml
+++ b/asterixdb/pom.xml
@@ -91,7 +91,7 @@
     <!-- Versions under dependencymanagement or used in many projects via 
properties -->
     <algebricks.version>0.3.10-SNAPSHOT</algebricks.version>
     <hyracks.version>0.3.10-SNAPSHOT</hyracks.version>
-    <hadoop.version>3.3.6</hadoop.version>
+    <hadoop.version>3.3.4</hadoop.version>
     <jacoco.version>0.7.6.201602180812</jacoco.version>
     <log4j.version>2.22.1</log4j.version>
     <awsjavasdk.version>2.24.9</awsjavasdk.version>
diff --git a/hyracks-fullstack/pom.xml b/hyracks-fullstack/pom.xml
index 20c3e21..473f779d 100644
--- a/hyracks-fullstack/pom.xml
+++ b/hyracks-fullstack/pom.xml
@@ -69,7 +69,7 @@
     <test.includes>${global.test.includes}</test.includes>
     <test.excludes>${global.test.excludes}</test.excludes>
     <!-- Versions under dependencymanagement or used in many projects via 
properties -->
-    <hadoop.version>3.3.6</hadoop.version>
+    <hadoop.version>3.3.4</hadoop.version>
     <jacoco.version>0.7.6.201602180812</jacoco.version>
     <log4j.version>2.22.1</log4j.version>
     <snappy.version>1.1.10.5</snappy.version>

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19047
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I0017e63ac0bddcfa0b342d9380d55934a76c12ec
Gerrit-Change-Number: 19047
Gerrit-PatchSet: 1
Gerrit-Owner: Peeyush Gupta <[email protected]>
Gerrit-MessageType: newchange

Reply via email to