Copilot commented on code in PR #2653:
URL: https://github.com/apache/sedona/pull/2653#discussion_r2808694273
##########
spark/spark-4.0/src/test/scala/org/apache/sedona/sql/ShapefileTests.scala:
##########
@@ -517,18 +518,6 @@ class ShapefileTests extends TestBaseScala with
BeforeAndAfterAll {
assert(row.getAs[String]("aUnicode") == s"测试$id")
}
}
Review Comment:
The removal of the partition filter test code appears unrelated to adding
_metadata support. This test verified that partition filters work correctly by
filtering on the 'part' column. Unless there's a specific reason this test is
now redundant or broken, it should be retained to maintain test coverage for
partition filtering functionality.
```suggestion
}
// Verify that partition filters on the 'part' column work correctly.
val part1Df = shapefileDf.filter("part = 1")
val part2Df = shapefileDf.filter("part = 2")
val part1Rows = part1Df.collect()
val part2Rows = part2Df.collect()
// All rows in each filtered DataFrame should have the expected
partition value.
assert(part1Rows.forall(_.getAs[Int]("part") == 1))
assert(part2Rows.forall(_.getAs[Int]("part") == 2))
// The filtered partitions together should cover all rows.
assert(part1Rows.length + part2Rows.length == rows.length)
```
##########
spark/spark-3.4/src/test/scala/org/apache/sedona/sql/ShapefileTests.scala:
##########
@@ -20,7 +20,8 @@ package org.apache.sedona.sql
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
-import org.apache.spark.sql.types.{DateType, DecimalType, LongType,
StringType, StructField, StructType}
+import org.apache.spark.sql.Row
Review Comment:
The import statement for org.apache.spark.sql.Row is added but never used in
the test file. Consider removing this unused import to keep the code clean.
```suggestion
```
##########
spark/spark-3.5/src/test/scala/org/apache/sedona/sql/ShapefileTests.scala:
##########
@@ -517,18 +518,6 @@ class ShapefileTests extends TestBaseScala with
BeforeAndAfterAll {
assert(row.getAs[String]("aUnicode") == s"测试$id")
}
}
-
- // Using partition filters
- rows = shapefileDf.where("part = 2").collect()
- assert(rows.length == 4)
- rows.foreach { row =>
- assert(row.getAs[Geometry]("geometry").isInstanceOf[Point])
- assert(row.getAs[Int]("part") == 2)
- val id = row.getAs[Long]("id")
- assert(id > 10)
- assert(row.getAs[Long]("aInt") == id)
- assert(row.getAs[String]("aUnicode") == s"测试$id")
- }
}
Review Comment:
The removal of the partition filter test code appears unrelated to adding
_metadata support. This test verified that partition filters work correctly by
filtering on the 'part' column. Unless there's a specific reason this test is
now redundant or broken, it should be retained to maintain test coverage for
partition filtering functionality.
```suggestion
it("read partitioned directory with partition filter") {
FileUtils.cleanDirectory(new File(temporaryLocation))
Files.createDirectory(new File(temporaryLocation + "/part=1").toPath)
Files.createDirectory(new File(temporaryLocation + "/part=2").toPath)
FileUtils.copyFile(
new File(resourceFolder + "shapefiles/datatypes/datatypes1.shp"),
new File(temporaryLocation + "/part=1/datatypes1.shp"))
FileUtils.copyFile(
new File(resourceFolder + "shapefiles/datatypes/datatypes1.dbf"),
new File(temporaryLocation + "/part=1/datatypes1.dbf"))
FileUtils.copyFile(
new File(resourceFolder + "shapefiles/datatypes/datatypes1.cpg"),
new File(temporaryLocation + "/part=1/datatypes1.cpg"))
FileUtils.copyFile(
new File(resourceFolder + "shapefiles/datatypes/datatypes2.shp"),
new File(temporaryLocation + "/part=2/datatypes2.shp"))
FileUtils.copyFile(
new File(resourceFolder + "shapefiles/datatypes/datatypes2.dbf"),
new File(temporaryLocation + "/part=2/datatypes2.dbf"))
FileUtils.copyFile(
new File(resourceFolder + "shapefiles/datatypes/datatypes2.cpg"),
new File(temporaryLocation + "/part=2/datatypes2.cpg"))
val shapefileDf = sparkSession.read
.format("shapefile")
.load(temporaryLocation)
.where("part = 1")
.select("part", "id", "aInt", "aUnicode", "geometry")
val rows = shapefileDf.collect()
assert(rows.nonEmpty)
rows.foreach { row =>
assert(row.getAs[Geometry]("geometry").isInstanceOf[Point])
val id = row.getAs[Long]("id")
assert(row.getAs[Int]("part") == 1)
assert(row.getAs[Long]("aInt") == id)
if (id > 0) {
assert(row.getAs[String]("aUnicode") == s"测试$id")
}
}
}
```
##########
spark/spark-4.1/src/test/scala/org/apache/sedona/sql/ShapefileTests.scala:
##########
@@ -517,18 +518,6 @@ class ShapefileTests extends TestBaseScala with
BeforeAndAfterAll {
assert(row.getAs[String]("aUnicode") == s"测试$id")
}
}
Review Comment:
The removal of the partition filter test code (lines 522-533 in the original
file) appears unrelated to adding _metadata support. This test verified that
partition filters work correctly by filtering on the 'part' column. Unless
there's a specific reason this test is now redundant or broken, it should be
retained to maintain test coverage for partition filtering functionality.
```suggestion
}
// Verify that partition filters on the 'part' column work correctly.
val part1Df = shapefileDf.where("part = 1")
val part1Rows = part1Df.collect()
// Expect at least one row in partition 1 and fewer rows than the
total.
assert(part1Rows.nonEmpty)
assert(part1Rows.length < rows.length)
part1Rows.foreach { row =>
assert(row.getAs[Int]("part") == 1)
}
```
##########
spark/spark-4.1/src/test/scala/org/apache/sedona/sql/ShapefileTests.scala:
##########
@@ -20,7 +20,8 @@ package org.apache.sedona.sql
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
-import org.apache.spark.sql.types.{DateType, DecimalType, LongType,
StringType, StructField, StructType}
+import org.apache.spark.sql.Row
Review Comment:
The import statement for org.apache.spark.sql.Row is added but never used in
the test file. Consider removing this unused import to keep the code clean.
```suggestion
```
##########
spark/spark-3.5/src/test/scala/org/apache/sedona/sql/ShapefileTests.scala:
##########
@@ -20,7 +20,8 @@ package org.apache.sedona.sql
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
-import org.apache.spark.sql.types.{DateType, DecimalType, LongType,
StringType, StructField, StructType}
+import org.apache.spark.sql.Row
Review Comment:
The import statement for org.apache.spark.sql.Row is added but never used in
the test file. Consider removing this unused import to keep the code clean.
```suggestion
```
##########
spark/spark-4.0/src/test/scala/org/apache/sedona/sql/ShapefileTests.scala:
##########
@@ -20,7 +20,8 @@ package org.apache.sedona.sql
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
-import org.apache.spark.sql.types.{DateType, DecimalType, LongType,
StringType, StructField, StructType}
+import org.apache.spark.sql.Row
Review Comment:
The import statement for org.apache.spark.sql.Row is added but never used in
the test file. Consider removing this unused import to keep the code clean.
```suggestion
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]