jiayuasu commented on code in PR #2673:
URL: https://github.com/apache/sedona/pull/2673#discussion_r2851395005
##########
spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/raster/RasterOptions.scala:
##########
@@ -20,16 +20,52 @@ package org.apache.spark.sql.sedona_sql.io.raster
import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-private[io] class RasterOptions(@transient private val parameters:
CaseInsensitiveMap[String])
+class RasterOptions(@transient private val parameters:
CaseInsensitiveMap[String])
extends Serializable {
def this(parameters: Map[String, String]) =
this(CaseInsensitiveMap(parameters))
+ // The following options are used to read raster data
+
+ /**
+ * Whether to retile the raster data. If true, the raster data will be
retiled into smaller
+ * tiles. If false, the raster data will be read as a single tile.
+ */
+ val retile: Boolean = parameters.getOrElse("retile", "true").toBoolean
+
+ /**
+ * The width of the tile. This is only effective when retile is true. If
retile is true and
+ * tileWidth is not set, the default value is the width of the internal
tiles in the raster
+ * files. Each raster file may have different internal tile sizes.
+ */
+ val tileWidth: Option[Int] = parameters.get("tileWidth").map(_.toInt)
+
+ /**
+ * The height of the tile. This is only effective when retile is true. If
retile is true and
+ * tileHeight is not set, the default value is the same as tileWidth. If
tileHeight is set,
+ * tileWidth must be set as well.
+ */
+ val tileHeight: Option[Int] = parameters
+ .get("tileHeight")
+ .map { value =>
+ require(tileWidth.isDefined, "tileWidth must be set when tileHeight is
set")
+ value.toInt
+ }
+ .orElse(tileWidth)
+
Review Comment:
Fixed
##########
common/src/main/java/org/apache/sedona/common/raster/inputstream/HadoopImageInputStream.java:
##########
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.raster.inputstream;
+
+import java.io.IOException;
+import javax.imageio.stream.ImageInputStreamImpl;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/** An ImageInputStream that reads image data from a Hadoop FileSystem. */
+public class HadoopImageInputStream extends ImageInputStreamImpl {
+
+ private final FSDataInputStream stream;
+ private final Path path;
+ private final Configuration conf;
+
+ public HadoopImageInputStream(Path path, Configuration conf) throws
IOException {
+ FileSystem fs = path.getFileSystem(conf);
+ stream = fs.open(path);
+ this.path = path;
+ this.conf = conf;
+ }
+
+ public HadoopImageInputStream(Path path) throws IOException {
+ this(path, new Configuration());
+ }
+
+ public HadoopImageInputStream(FSDataInputStream stream) {
+ this.stream = stream;
+ this.path = null;
+ this.conf = null;
+ }
+
+ public Path getPath() {
+ return path;
+ }
+
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public void close() throws IOException {
+ super.close();
+ stream.close();
+ }
+
+ @Override
+ public int read() throws IOException {
+ byte[] buf = new byte[1];
+ int ret_len = read(buf, 0, 1);
+ if (ret_len < 0) {
+ return ret_len;
+ }
+ return buf[0] & 0xFF;
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ checkClosed();
+ bitOffset = 0;
+
+ if (len == 0) {
+ return 0;
+ }
+
+ // stream.read may return fewer data than requested, so we need to loop
until we get all the
+ // data, or hit the end of the stream. We can not simply perform an
incomplete read and return
+ // the number of bytes actually read, since the methods in
ImageInputStreamImpl such as
+ // readInt() relies on this method and assumes that partial reads only
happens when reading
+ // EOF. This might be a bug of imageio since they should invoke
readFully() in such cases.
+ int remaining = len;
+ while (remaining > 0) {
+ int ret_len = stream.read(b, off, remaining);
+ if (ret_len < 0) {
+ // Hit EOF, no more data to read.
Review Comment:
Fixed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]