HyukjinKwon commented on a change in pull request #25245: 
[SPARK-25382][SQL][PYSPARK] Remove ImageSchema.readImages in 3.0
URL: https://github.com/apache/spark/pull/25245#discussion_r308999486
 
 

 ##########
 File path: python/pyspark/ml/tests/test_image.py
 ##########
 @@ -24,18 +24,19 @@
 from pyspark.testing.utils import QuietTest
 
 
-class ImageReaderTest(SparkSessionTestCase):
+class ImageFileFormatTest(SparkSessionTestCase):
 
     def test_read_images(self):
         data_path = 'data/mllib/images/origin/kittens'
-        df = ImageSchema.readImages(data_path, recursive=True, 
dropImageFailures=True)
+        df = self.spark.read.format("image") \
+            .option("dropInvalid", True) \
+            .option("recursiveFileLookup", True) \
+            .load(data_path)
         self.assertEqual(df.count(), 4)
         first_row = df.take(1)[0][0]
         array = ImageSchema.toNDArray(first_row)
         self.assertEqual(len(array), first_row[1])
         self.assertEqual(ImageSchema.toImage(array, origin=first_row[0]), 
first_row)
-        self.assertEqual(df.schema, ImageSchema.imageSchema)
-        self.assertEqual(df.schema["image"].dataType, ImageSchema.columnSchema)
 
 Review comment:
   If nullability matters, we can just compare `simpleString`.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to