This is an automated email from the ASF dual-hosted git repository.

weibin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git


The following commit(s) were added to refs/heads/main by this push:
     new b33c1f0  chore: Add more reference code link to datasource (#472)
b33c1f0 is described below

commit b33c1f0f36246fa45761fb5d122f869d18432a7e
Author: Weibin Zeng <[email protected]>
AuthorDate: Mon May 6 19:12:01 2024 +0800

    chore: Add more reference code link to datasource (#472)
---
 .../scala/org/apache/graphar/datasources/GarCommitProtocol.scala    | 4 ++++
 .../main/scala/org/apache/graphar/datasources/GarDataSource.scala   | 3 ++-
 .../src/main/scala/org/apache/graphar/datasources/GarScan.scala     | 3 +++
 .../main/scala/org/apache/graphar/datasources/GarScanBuilder.scala  | 3 +++
 .../src/main/scala/org/apache/graphar/datasources/GarTable.scala    | 3 +++
 .../scala/org/apache/graphar/datasources/GarWriterBuilder.scala     | 5 +++--
 .../scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala | 5 +++--
 .../scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala  | 6 ++++--
 .../scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala  | 5 +++--
 .../apache/graphar/datasources/parquet/ParquetWriterBuilder.scala   | 6 ++++--
 .../scala/org/apache/graphar/datasources/GarCommitProtocol.scala    | 4 ++++
 .../main/scala/org/apache/graphar/datasources/GarDataSource.scala   | 3 ++-
 .../src/main/scala/org/apache/graphar/datasources/GarScan.scala     | 3 +++
 .../main/scala/org/apache/graphar/datasources/GarScanBuilder.scala  | 3 +++
 .../src/main/scala/org/apache/graphar/datasources/GarTable.scala    | 3 +++
 .../scala/org/apache/graphar/datasources/GarWriterBuilder.scala     | 5 +++--
 .../scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala | 5 +++--
 .../scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala  | 6 ++++--
 .../scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala  | 5 +++--
 .../apache/graphar/datasources/parquet/ParquetWriterBuilder.scala   | 5 +++--
 20 files changed, 63 insertions(+), 22 deletions(-)

diff --git 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
index 2ee5d26..07cff02 100644
--- 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
+++ 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+
 package org.apache.graphar.datasources
 
 import org.apache.graphar.GeneralParams
@@ -61,6 +64,7 @@ class GarCommitProtocol(
     with Serializable
     with Logging {
 
+  // override getFilename to customize the file name
   override def getFilename(
       taskContext: TaskAttemptContext,
       ext: String
diff --git 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
index 958a7c5..38a3c18 100644
--- 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
+++ 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
@@ -35,7 +35,8 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.connector.expressions.Transform
 
-object GarUtils
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
 
 /**
  * GarDataSource is a class to provide gar files as the data source for spark.
diff --git 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScan.scala
 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScan.scala
index ca3e6f1..4b063db 100644
--- 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScan.scala
+++ 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScan.scala
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+
 package org.apache.graphar.datasources
 
 import scala.collection.JavaConverters._
diff --git 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
index 7ca7e06..1e83c77 100644
--- 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
+++ 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
+
 package org.apache.graphar.datasources
 
 import org.apache.spark.sql.SparkSession
diff --git 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarTable.scala
 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarTable.scala
index 9bda768..8aa2317 100644
--- 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarTable.scala
+++ 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarTable.scala
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+
 package org.apache.graphar.datasources
 
 import scala.collection.JavaConverters._
diff --git 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
index 86c5f6e..3acd924 100644
--- 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
+++ 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
@@ -12,10 +12,11 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- *
- * The implementation of GarWriteBuilder is referred from FileWriteBuilder of 
spark 3.1.1
  */
 
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+
 package org.apache.graphar.datasources
 
 import java.util.UUID
diff --git 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
index a19bd80..c0a38d5 100644
--- 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
+++ 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
@@ -12,10 +12,11 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- *
- * The implementation of CSVWriteBuilder is refered from CSVWriteBuilder of 
spark 3.1.1
  */
 
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala
+
 package org.apache.graphar.datasources.csv
 
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
diff --git 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
index cb642e1..c1d2ff8 100644
--- 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
+++ 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
@@ -12,10 +12,12 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- *
- * The implementation of OrcOutputWriter is referred from OrcOutputWriter of 
spark 3.1.1
  */
 
+// Derived from Apache Spark 3.1.1, since the OrcOutputWriter is private in 
the original source,
+// we have to reimplement it here.
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala
+
 package org.apache.graphar.datasources.orc
 
 import org.apache.hadoop.fs.Path
diff --git 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
index 8aee0c7..9bdf796 100644
--- 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
+++ 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
@@ -12,10 +12,11 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- *
- * The implementation of OrcWriteBuilder is referred from OrcWriteBuilder of 
spark 3.1.1
  */
 
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/ORCWriteBuilder.scala
+
 package org.apache.graphar.datasources.orc
 
 import org.apache.hadoop.mapred.JobConf
diff --git 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
index 4befb04..8d7fece 100644
--- 
a/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
+++ 
b/spark/datasources-32/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
@@ -12,10 +12,11 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- *
- * The implementation of ParquetWriteBuilder is referred from 
ParquetWriteBuilder of spark 3.1.1
  */
 
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWriteBuilder.scala
+
 package org.apache.graphar.datasources.parquet
 
 import org.apache.hadoop.mapreduce.{Job, OutputCommitter, TaskAttemptContext}
@@ -24,6 +25,7 @@ import 
org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel
 import org.apache.parquet.hadoop.codec.CodecConfig
 import org.apache.parquet.hadoop.util.ContextUtil
 
+import org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.connector.write.LogicalWriteInfo
diff --git 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
index 4b19373..8be2e23 100644
--- 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
+++ 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarCommitProtocol.scala
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+// Derived from Apache Spark 3.3.4
+// 
https://github.com/apache/spark/blob/18db204/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+
 package org.apache.graphar.datasources
 
 import org.apache.graphar.GeneralParams
@@ -62,6 +65,7 @@ class GarCommitProtocol(
     with Serializable
     with Logging {
 
+  // override getFilename to customize the file name
   override def getFilename(
       taskContext: TaskAttemptContext,
       spec: FileNameSpec
diff --git 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
index 958a7c5..38a3c18 100644
--- 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
+++ 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarDataSource.scala
@@ -35,7 +35,8 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.connector.expressions.Transform
 
-object GarUtils
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
 
 /**
  * GarDataSource is a class to provide gar files as the data source for spark.
diff --git 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScan.scala
 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScan.scala
index 1c5c2a4..bf4995b 100644
--- 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScan.scala
+++ 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScan.scala
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+// Derived from Apache Spark 3.3.4
+// 
https://github.com/apache/spark/blob/18db204/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+
 package org.apache.graphar.datasources
 
 import scala.collection.JavaConverters._
diff --git 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
index 4968b9d..85f43e5 100644
--- 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
+++ 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarScanBuilder.scala
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+// Derived from Apache Spark 3.3.4
+// 
https://github.com/apache/spark/blob/18db204/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
+
 package org.apache.graphar.datasources
 
 import org.apache.spark.sql.SparkSession
diff --git 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarTable.scala
 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarTable.scala
index 9bda768..8aa2317 100644
--- 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarTable.scala
+++ 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarTable.scala
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+
 package org.apache.graphar.datasources
 
 import scala.collection.JavaConverters._
diff --git 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
index 5cba5c0..8363ae2 100644
--- 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
+++ 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/GarWriterBuilder.scala
@@ -12,10 +12,11 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- *
- * The implementation of GarWriteBuilder is referred from FileWriteBuilder of 
spark 3.1.1
  */
 
+// Derived from Apache Spark 3.3.4
+// 
https://github.com/apache/spark/blob/18db204/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+
 package org.apache.graphar.datasources
 
 import java.util.UUID
diff --git 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
index a19bd80..c0a38d5 100644
--- 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
+++ 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/csv/CSVWriterBuilder.scala
@@ -12,10 +12,11 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- *
- * The implementation of CSVWriteBuilder is refered from CSVWriteBuilder of 
spark 3.1.1
  */
 
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala
+
 package org.apache.graphar.datasources.csv
 
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
diff --git 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
index cb642e1..c1d2ff8 100644
--- 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
+++ 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcOutputWriter.scala
@@ -12,10 +12,12 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- *
- * The implementation of OrcOutputWriter is referred from OrcOutputWriter of 
spark 3.1.1
  */
 
+// Derived from Apache Spark 3.1.1, since the OrcOutputWriter is private in 
the original source,
+// we have to reimplement it here.
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala
+
 package org.apache.graphar.datasources.orc
 
 import org.apache.hadoop.fs.Path
diff --git 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
index 8aee0c7..9bdf796 100644
--- 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
+++ 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/orc/OrcWriteBuilder.scala
@@ -12,10 +12,11 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- *
- * The implementation of OrcWriteBuilder is referred from OrcWriteBuilder of 
spark 3.1.1
  */
 
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/ORCWriteBuilder.scala
+
 package org.apache.graphar.datasources.orc
 
 import org.apache.hadoop.mapred.JobConf
diff --git 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
index 26e17ad..5c92204 100644
--- 
a/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
+++ 
b/spark/datasources-33/src/main/scala/org/apache/graphar/datasources/parquet/ParquetWriterBuilder.scala
@@ -12,10 +12,11 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- *
- * The implementation of ParquetWriteBuilder is referred from 
ParquetWriteBuilder of spark 3.1.1
  */
 
+// Derived from Apache Spark 3.1.1
+// 
https://github.com/apache/spark/blob/1d550c4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWriteBuilder.scala
+
 package org.apache.graphar.datasources.parquet
 
 import org.apache.hadoop.mapreduce.{Job, OutputCommitter, TaskAttemptContext}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to