Github user rdblue commented on a diff in the pull request:
https://github.com/apache/spark/pull/19269#discussion_r143524057
--- Diff:
sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleWritableDataSource.java
---
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql.sources.v2;
+
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Optional;
+
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.expressions.GenericRow;
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
+import org.apache.spark.sql.sources.v2.DataSourceV2;
+import org.apache.spark.sql.sources.v2.DataSourceV2Options;
+import org.apache.spark.sql.sources.v2.ReadSupport;
+import org.apache.spark.sql.sources.v2.WriteSupport;
+import org.apache.spark.sql.sources.v2.reader.DataReader;
+import org.apache.spark.sql.sources.v2.reader.DataSourceV2Reader;
+import org.apache.spark.sql.sources.v2.reader.ReadTask;
+import org.apache.spark.sql.sources.v2.writer.*;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaSimpleWritableDataSource implements DataSourceV2,
ReadSupport, WriteSupport {
+ private StructType schema = new StructType().add("i", "long").add("j",
"long");
+
+ class Reader implements DataSourceV2Reader {
+ private String path;
+
+ Reader(String path) {
+ this.path = path;
+ }
+
+ @Override
+ public StructType readSchema() {
+ return schema;
+ }
+
+ @Override
+ public List<ReadTask<Row>> createReadTasks() {
+ return java.util.Arrays.asList(new JavaSimpleCSVReadTask(path));
+ }
+ }
+
+ static class JavaSimpleCSVReadTask implements ReadTask<Row>,
DataReader<Row> {
+ private String path;
+ private volatile Iterator<String> lines;
+ private volatile String currentLine;
+
+ JavaSimpleCSVReadTask(Iterator<String> lines) {
+ this.lines = lines;
+ }
+
+ JavaSimpleCSVReadTask(String path) {
+ this.path = path;
+ }
+
+ @Override
+ public DataReader<Row> createReader() {
+ assert path != null;
+ try {
+ if (Files.exists(Paths.get(path))) {
+ return new
JavaSimpleCSVReadTask(Files.readAllLines(Paths.get(path)).iterator());
+ } else {
+ return new JavaSimpleCSVReadTask(Collections.emptyIterator());
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
--- End diff --
Does Spark have a `RuntimeIOException`? I typically like to use those so
that the IOException can still be caught and handled by code that chooses to.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]