This is an automated email from the ASF dual-hosted git repository.
weibin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git
The following commit(s) were added to refs/heads/main by this push:
new b33c1f0 chore: Add more reference code link to datasource (#472)
b33c1f0 is described below
commit b33c1f0f36246fa45761fb5d122f869d18432a7e
Author: Weibin Zeng <[email protected]>
AuthorDate: Mon May 6 19:12:01 2024 +0800
chore: Add more reference code link to datasource (#472)
---
.../scala/org/apache/graphar/datasources/GarCommitProtocol.scala | 4 ++++
.../main/scala/org/apache/graphar/datasources/GarDataSource.scala | 3 ++-
.../src/main/scala/org/apache/graphar/datasources/GarScan.scala | 3 +++
.../main/scala/org/apache/graphar/datasources/GarScanBuilder.scala | 3 +++
.../src/main/scala/org/apache/graphar/datasources/GarTable.scala | 3 +++
.../scala/org/apache/graphar/datasources/GarWriterBuilder.scala | 5 +++--
.../scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala | 5 +++--
.../scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala | 6 ++++--
.../scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala | 5 +++--
.../apache/graphar/datasources/parquet/ParquetWriterBuilder.scala | 6 ++++--
.../scala/org/apache/graphar/datasources/GarCommitProtocol.scala | 4 ++++
.../main/scala/org/apache/graphar/datasources/GarDataSource.scala | 3 ++-
.../src/main/scala/org/apache/graphar/datasources/GarScan.scala | 3 +++
.../main/scala/org/apache/graphar/datasources/GarScanBuilder.scala | 3 +++
.../src/main/scala/org/apache/graphar/datasources/GarTable.scala | 3 +++
.../scala/org/apache/graphar/datasources/GarWriterBuilder.scala | 5 +++--
.../scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala | 5 +++--
.../scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala | 6 ++++--
.../scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala | 5 +++--
.../apache/graphar/datasources/parquet/ParquetWriterBuilder.scala | 5 +++--
20 files changed, 63 insertions(+), 22 deletions(-)
diff --git
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
index 2ee5d26..07cff02 100644
---
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
+++
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
@@ -14,6 +14,9 @@
* limitations under the License.
*/
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+
package org.apache.graphar.datasources
import org.apache.graphar.GeneralParams
@@ -61,6 +64,7 @@ class GarCommitProtocol(
with Serializable
with Logging {
+ // override getFilename to customize the file name
override def getFilename(
taskContext: TaskAttemptContext,
ext: String
diff --git
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
index 958a7c5..38a3c18 100644
---
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
+++
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
@@ -35,7 +35,8 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.sql.sources.DataSourceRegister
import org.apache.spark.sql.connector.expressions.Transform
-object GarUtils
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
/**
* GarDataSource is a class to provide gar files as the data source for spark.
diff --git
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScan.scala
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScan.scala
index ca3e6f1..4b063db 100644
---
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScan.scala
+++
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScan.scala
@@ -14,6 +14,9 @@
* limitations under the License.
*/
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+
package org.apache.graphar.datasources
import scala.collection.JavaConverters._
diff --git
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
index 7ca7e06..1e83c77 100644
---
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
+++
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
@@ -14,6 +14,9 @@
* limitations under the License.
*/
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
+
package org.apache.graphar.datasources
import org.apache.spark.sql.SparkSession
diff --git
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarTable.scala
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarTable.scala
index 9bda768..8aa2317 100644
---
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarTable.scala
+++
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarTable.scala
@@ -14,6 +14,9 @@
* limitations under the License.
*/
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+
package org.apache.graphar.datasources
import scala.collection.JavaConverters._
diff --git
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
index 86c5f6e..3acd924 100644
---
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
+++
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
@@ -12,10 +12,11 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *
- * The implementation of GarWriteBuilder is referred from FileWriteBuilder of
spark 3.1.1
*/
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+
package org.apache.graphar.datasources
import java.util.UUID
diff --git
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
index a19bd80..c0a38d5 100644
---
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
+++
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
@@ -12,10 +12,11 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *
- * The implementation of CSVWriteBuilder is refered from CSVWriteBuilder of
spark 3.1.1
*/
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala
+
package org.apache.graphar.datasources.csv
import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
diff --git
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
index cb642e1..c1d2ff8 100644
---
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
+++
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
@@ -12,10 +12,12 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *
- * The implementation of OrcOutputWriter is referred from OrcOutputWriter of
spark 3.1.1
*/
+// Derived from Apache Spark 3.1.1, since the OrcOutputWriter is private in
the original source,
+// we have to reimplement it here.
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala
+
package org.apache.graphar.datasources.orc
import org.apache.hadoop.fs.Path
diff --git
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
index 8aee0c7..9bdf796 100644
---
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
+++
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
@@ -12,10 +12,11 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *
- * The implementation of OrcWriteBuilder is referred from OrcWriteBuilder of
spark 3.1.1
*/
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/ORCWriteBuilder.scala
+
package org.apache.graphar.datasources.orc
import org.apache.hadoop.mapred.JobConf
diff --git
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
index 4befb04..8d7fece 100644
---
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
+++
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
@@ -12,10 +12,11 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *
- * The implementation of ParquetWriteBuilder is referred from
ParquetWriteBuilder of spark 3.1.1
*/
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWriteBuilder.scala
+
package org.apache.graphar.datasources.parquet
import org.apache.hadoop.mapreduce.{Job, OutputCommitter, TaskAttemptContext}
@@ -24,6 +25,7 @@ import
org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel
import org.apache.parquet.hadoop.codec.CodecConfig
import org.apache.parquet.hadoop.util.ContextUtil
+import org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter
import org.apache.spark.internal.Logging
import org.apache.spark.sql.Row
import org.apache.spark.sql.connector.write.LogicalWriteInfo
diff --git
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
index 4b19373..8be2e23 100644
---
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
+++
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
@@ -14,6 +14,9 @@
* limitations under the License.
*/
+// Derived from Apache Spark 3.3.4
+//
https://github.com/apache/spark/blob/18db204/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+
package org.apache.graphar.datasources
import org.apache.graphar.GeneralParams
@@ -62,6 +65,7 @@ class GarCommitProtocol(
with Serializable
with Logging {
+ // override getFilename to customize the file name
override def getFilename(
taskContext: TaskAttemptContext,
spec: FileNameSpec
diff --git
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
index 958a7c5..38a3c18 100644
---
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
+++
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
@@ -35,7 +35,8 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.sql.sources.DataSourceRegister
import org.apache.spark.sql.connector.expressions.Transform
-object GarUtils
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
/**
* GarDataSource is a class to provide gar files as the data source for spark.
diff --git
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScan.scala
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScan.scala
index 1c5c2a4..bf4995b 100644
---
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScan.scala
+++
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScan.scala
@@ -14,6 +14,9 @@
* limitations under the License.
*/
+// Derived from Apache Spark 3.3.4
+//
https://github.com/apache/spark/blob/18db204/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+
package org.apache.graphar.datasources
import scala.collection.JavaConverters._
diff --git
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
index 4968b9d..85f43e5 100644
---
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
+++
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
@@ -14,6 +14,9 @@
* limitations under the License.
*/
+// Derived from Apache Spark 3.3.4
+//
https://github.com/apache/spark/blob/18db204/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
+
package org.apache.graphar.datasources
import org.apache.spark.sql.SparkSession
diff --git
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarTable.scala
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarTable.scala
index 9bda768..8aa2317 100644
---
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarTable.scala
+++
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarTable.scala
@@ -14,6 +14,9 @@
* limitations under the License.
*/
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+
package org.apache.graphar.datasources
import scala.collection.JavaConverters._
diff --git
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
index 5cba5c0..8363ae2 100644
---
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
+++
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
@@ -12,10 +12,11 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *
- * The implementation of GarWriteBuilder is referred from FileWriteBuilder of
spark 3.1.1
*/
+// Derived from Apache Spark 3.3.4
+//
https://github.com/apache/spark/blob/18db204/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+
package org.apache.graphar.datasources
import java.util.UUID
diff --git
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
index a19bd80..c0a38d5 100644
---
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
+++
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
@@ -12,10 +12,11 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *
- * The implementation of CSVWriteBuilder is refered from CSVWriteBuilder of
spark 3.1.1
*/
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala
+
package org.apache.graphar.datasources.csv
import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
diff --git
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
index cb642e1..c1d2ff8 100644
---
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
+++
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
@@ -12,10 +12,12 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *
- * The implementation of OrcOutputWriter is referred from OrcOutputWriter of
spark 3.1.1
*/
+// Derived from Apache Spark 3.1.1, since the OrcOutputWriter is private in
the original source,
+// we have to reimplement it here.
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala
+
package org.apache.graphar.datasources.orc
import org.apache.hadoop.fs.Path
diff --git
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
index 8aee0c7..9bdf796 100644
---
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
+++
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
@@ -12,10 +12,11 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *
- * The implementation of OrcWriteBuilder is referred from OrcWriteBuilder of
spark 3.1.1
*/
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/ORCWriteBuilder.scala
+
package org.apache.graphar.datasources.orc
import org.apache.hadoop.mapred.JobConf
diff --git
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
index 26e17ad..5c92204 100644
---
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
+++
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
@@ -12,10 +12,11 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *
- * The implementation of ParquetWriteBuilder is referred from
ParquetWriteBuilder of spark 3.1.1
*/
+// Derived from Apache Spark 3.1.1
+//
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWriteBuilder.scala
+
package org.apache.graphar.datasources.parquet
import org.apache.hadoop.mapreduce.{Job, OutputCommitter, TaskAttemptContext}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]