aokolnychyi commented on a change in pull request #786: replace SparkDataFile 
with DataFile
URL: https://github.com/apache/incubator-iceberg/pull/786#discussion_r377402464
 
 

 ##########
 File path: spark/src/main/scala/org/apache/iceberg/spark/SparkTableUtil.scala
 ##########
 @@ -200,50 +202,6 @@ object SparkTableUtil {
    */
   case class SparkPartition(values: Map[String, String], uri: String, format: 
String)
 
-  /**
-   * Case class representing a data file.
-   */
-  case class SparkDataFile(
-      path: String,
-      partition: collection.Map[String, String],
-      format: String,
-      fileSize: Long,
-      rowGroupSize: Long,
-      rowCount: Long,
-      columnSizes: Array[Long],
-      valueCounts: Array[Long],
-      nullValueCounts: Array[Long],
-      lowerBounds: Seq[Array[Byte]],
-      upperBounds: Seq[Array[Byte]]
-    ) {
-
-    /**
-     * Convert this to a [[DataFile]] that can be added to a 
[[org.apache.iceberg.Table]].
-     *
-     * @param spec a [[PartitionSpec]] that will be used to parse the 
partition key
-     * @return a [[DataFile]] that can be passed to 
[[org.apache.iceberg.AppendFiles]]
-     */
-    def toDataFile(spec: PartitionSpec): DataFile = {
-      // values are strings, so pass a path to let the builder coerce to the 
right types
-      val partitionKey = spec.fields.asScala.map(_.name).map { name =>
-        s"$name=${partition(name)}"
-      }.mkString("/")
-
-      DataFiles.builder(spec)
-        .withPath(path)
-        .withFormat(format)
-        .withFileSizeInBytes(fileSize)
-        .withMetrics(new Metrics(rowCount,
-          arrayToMap(columnSizes),
-          arrayToMap(valueCounts),
-          arrayToMap(nullValueCounts),
-          arrayToMap(lowerBounds),
-          arrayToMap(upperBounds)))
-        .withPartitionPath(partitionKey)
-        .build()
-    }
-  }
-
   private def bytesMapToArray(map: java.util.Map[Integer, ByteBuffer]): 
Seq[Array[Byte]] = {
 
 Review comment:
   Do we still need these methods?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to