Github user WeichenXu123 commented on a diff in the pull request:
https://github.com/apache/spark/pull/22328#discussion_r215138862
--- Diff:
mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala
---
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.source.image
+
+import java.nio.file.Paths
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.ml.image.ImageSchema._
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.functions.{col, substring_index}
+
+class ImageFileFormatSuite extends SparkFunSuite with
MLlibTestSparkContext {
+
+ // Single column of images named "image"
+ private lazy val imagePath = "../data/mllib/images/imagesWithPartitions"
+
+ test("image datasource count test") {
+ val df1 = spark.read.format("image").load(imagePath)
+ assert(df1.count === 9)
+
+ val df2 = spark.read.format("image").option("dropImageFailures",
"true").load(imagePath)
+ assert(df2.count === 8)
+ }
+
+ test("image datasource test: read jpg image") {
+ val df = spark.read.format("image").load(imagePath +
"/cls=kittens/date=2018-02/DP153539.jpg")
+ assert(df.count() === 1)
+ }
+
+ test("image datasource test: read png image") {
+ val df = spark.read.format("image").load(imagePath +
"/cls=multichannel/date=2018-01/BGRA.png")
+ assert(df.count() === 1)
+ }
+
+ test("image datasource test: read non image") {
+ val filePath = imagePath + "/cls=kittens/date=2018-01/not-image.txt"
+ val df = spark.read.format("image").option("dropImageFailures", "true")
+ .load(filePath)
+ assert(df.count() === 0)
+
+ val df2 = spark.read.format("image").option("dropImageFailures",
"false")
+ .load(filePath)
+ assert(df2.count() === 1)
+ val result = df2.head()
+ assert(result === invalidImageRow(
+ Paths.get(filePath).toAbsolutePath().normalize().toUri().toString))
+ }
+
+ test("image datasource partition test") {
+ val result = spark.read.format("image")
+ .option("dropImageFailures", "true").load(imagePath)
+ .select(substring_index(col("image.origin"), "/", -1).as("origin"),
col("cls"), col("date"))
+ .collect()
+
+ assert(Set(result: _*) === Set(
+ Row("29.5.a_b_EGDP022204.jpg", "kittens", "2018-01"),
+ Row("54893.jpg", "kittens", "2018-02"),
+ Row("DP153539.jpg", "kittens", "2018-02"),
+ Row("DP802813.jpg", "kittens", "2018-02"),
+ Row("BGRA.png", "multichannel", "2018-01"),
+ Row("BGRA_alpha_60.png", "multichannel", "2018-01"),
+ Row("chr30.4.184.jpg", "multichannel", "2018-02"),
+ Row("grayscale.jpg", "multichannel", "2018-02")
+ ))
+ }
+
+ // Images with the different number of channels
+ test("readImages pixel values test") {
+
+ val images = spark.read.format("image").option("dropImageFailures",
"true")
+ .load(imagePath + "/cls=multichannel/").collect()
+
+ val firstBytes20Map = images.map { rrow =>
+ val row = rrow.getAs[Row]("image")
+ val filename = Paths.get(getOrigin(row)).getFileName().toString()
+ val mode = getMode(row)
+ val bytes20 = getData(row).slice(0, 20).toList
+ filename -> Tuple2(mode, bytes20)
--- End diff --
yea, `(mode, bytes20)` doesn't work, `filename -> (mode, bytes20)` will be
compiled as `filename.->(mode, bytes20)` and `->` receive 2 arguments and
compile error occurs.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]