davisusanibar commented on PR #34227:
URL: https://github.com/apache/arrow/pull/34227#issuecomment-1491097292

   FYI,
   
   With new version arrow-dataset-12.0.0-SNAPSHOT: 
https://github.com/ursacomputing/crossbow/releases/tag/actions-4043a22480-github-java-jars
   ```
   - wget 
https://github.com/ursacomputing/crossbow/releases/download/actions-4043a22480-github-java-jars/arrow-dataset-12.0.0-SNAPSHOT.jar
   - mvn install:install-file 
"-Dfile=/Users/dsusanibar/Downloads/dataset/actions-4043a22480-github-java-jars/arrow-dataset-12.0.0-SNAPSHOT.jar"
 "-DgroupId=org.apache.arrow" "-DartifactId=arrow-dataset" 
"-Dversion=12.0.0-SNAPSHOT" "-Dpackaging=jar"
   ```
   
   Query TPCH Nation table is woking as expected:
   
   ```
   import com.google.common.collect.ImmutableList;
   import io.substrait.isthmus.SqlToSubstrait;
   import io.substrait.proto.Plan;
   import org.apache.arrow.dataset.file.FileFormat;
   import org.apache.arrow.dataset.file.FileSystemDatasetFactory;
   import org.apache.arrow.dataset.jni.NativeMemoryPool;
   import org.apache.arrow.dataset.scanner.ScanOptions;
   import org.apache.arrow.dataset.scanner.Scanner;
   import org.apache.arrow.dataset.source.Dataset;
   import org.apache.arrow.dataset.source.DatasetFactory;
   import org.apache.arrow.dataset.substrait.AceroSubstraitConsumer;
   import org.apache.arrow.memory.BufferAllocator;
   import org.apache.arrow.memory.RootAllocator;
   import org.apache.arrow.vector.ipc.ArrowReader;
   import org.apache.calcite.sql.parser.SqlParseException;
   
   import java.nio.ByteBuffer;
   import java.util.HashMap;
   import java.util.Map;
   
   public class ClientSubstrait {
       public static void main(String[] args) {
           String uri = "file:///data/tpch_parquet/nation.parquet";
           ScanOptions options = new ScanOptions(/*batchSize*/ 32768);
           try (
                   BufferAllocator allocator = new RootAllocator();
                   DatasetFactory datasetFactory = new 
FileSystemDatasetFactory(allocator, NativeMemoryPool.getDefault(),
                           FileFormat.PARQUET, uri);
                   Dataset dataset = datasetFactory.finish();
                   Scanner scanner = dataset.newScan(options);
                   ArrowReader reader = scanner.scanBatches()
           ) {
               // map table to reader
               Map<String, ArrowReader> mapTableToArrowReader = new HashMap<>();
               mapTableToArrowReader.put("NATION", reader);
               // get binary plan
               Plan plan = getPlan();
               ByteBuffer substraitPlan = 
ByteBuffer.allocateDirect(plan.toByteArray().length);
               substraitPlan.put(plan.toByteArray());
               // run query
               try (ArrowReader arrowReader = new 
AceroSubstraitConsumer(allocator).runQuery(
                       substraitPlan,
                       mapTableToArrowReader
               )) {
                   while (arrowReader.loadNextBatch()) {
                       
System.out.println(arrowReader.getVectorSchemaRoot().contentToTSVString());
                   }
               }
           } catch (Exception e) {
               e.printStackTrace();
           }
       }
   
       static Plan getPlan() throws SqlParseException {
           String sql = "SELECT * from nation";
           String nation = "CREATE TABLE NATION (N_NATIONKEY BIGINT NOT NULL, 
N_NAME CHAR(25), " +
                   "N_REGIONKEY BIGINT NOT NULL, N_COMMENT VARCHAR(152))";
           SqlToSubstrait sqlToSubstrait = new SqlToSubstrait();
           Plan plan = sqlToSubstrait.execute(sql, ImmutableList.of(nation));
           return plan;
       }
   }
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to