davisusanibar commented on PR #34227: URL: https://github.com/apache/arrow/pull/34227#issuecomment-1491097292
FYI, With new version arrow-dataset-12.0.0-SNAPSHOT: https://github.com/ursacomputing/crossbow/releases/tag/actions-4043a22480-github-java-jars ``` - wget https://github.com/ursacomputing/crossbow/releases/download/actions-4043a22480-github-java-jars/arrow-dataset-12.0.0-SNAPSHOT.jar - mvn install:install-file "-Dfile=/Users/dsusanibar/Downloads/dataset/actions-4043a22480-github-java-jars/arrow-dataset-12.0.0-SNAPSHOT.jar" "-DgroupId=org.apache.arrow" "-DartifactId=arrow-dataset" "-Dversion=12.0.0-SNAPSHOT" "-Dpackaging=jar" ``` Query TPCH Nation table is woking as expected: ``` import com.google.common.collect.ImmutableList; import io.substrait.isthmus.SqlToSubstrait; import io.substrait.proto.Plan; import org.apache.arrow.dataset.file.FileFormat; import org.apache.arrow.dataset.file.FileSystemDatasetFactory; import org.apache.arrow.dataset.jni.NativeMemoryPool; import org.apache.arrow.dataset.scanner.ScanOptions; import org.apache.arrow.dataset.scanner.Scanner; import org.apache.arrow.dataset.source.Dataset; import org.apache.arrow.dataset.source.DatasetFactory; import org.apache.arrow.dataset.substrait.AceroSubstraitConsumer; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.ipc.ArrowReader; import org.apache.calcite.sql.parser.SqlParseException; import java.nio.ByteBuffer; import java.util.HashMap; import java.util.Map; public class ClientSubstrait { public static void main(String[] args) { String uri = "file:///data/tpch_parquet/nation.parquet"; ScanOptions options = new ScanOptions(/*batchSize*/ 32768); try ( BufferAllocator allocator = new RootAllocator(); DatasetFactory datasetFactory = new FileSystemDatasetFactory(allocator, NativeMemoryPool.getDefault(), FileFormat.PARQUET, uri); Dataset dataset = datasetFactory.finish(); Scanner scanner = dataset.newScan(options); ArrowReader reader = scanner.scanBatches() ) { // map table to reader Map<String, ArrowReader> mapTableToArrowReader = new HashMap<>(); mapTableToArrowReader.put("NATION", reader); // get binary plan Plan plan = getPlan(); ByteBuffer substraitPlan = ByteBuffer.allocateDirect(plan.toByteArray().length); substraitPlan.put(plan.toByteArray()); // run query try (ArrowReader arrowReader = new AceroSubstraitConsumer(allocator).runQuery( substraitPlan, mapTableToArrowReader )) { while (arrowReader.loadNextBatch()) { System.out.println(arrowReader.getVectorSchemaRoot().contentToTSVString()); } } } catch (Exception e) { e.printStackTrace(); } } static Plan getPlan() throws SqlParseException { String sql = "SELECT * from nation"; String nation = "CREATE TABLE NATION (N_NATIONKEY BIGINT NOT NULL, N_NAME CHAR(25), " + "N_REGIONKEY BIGINT NOT NULL, N_COMMENT VARCHAR(152))"; SqlToSubstrait sqlToSubstrait = new SqlToSubstrait(); Plan plan = sqlToSubstrait.execute(sql, ImmutableList.of(nation)); return plan; } } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
