alamb commented on code in PR #5276:
URL: https://github.com/apache/arrow-rs/pull/5276#discussion_r1440568984


##########
object_store/src/client/get.rs:
##########
@@ -45,25 +50,114 @@ impl<T: GetClient> GetClientExt for T {
     async fn get_opts(&self, location: &Path, options: GetOptions) -> 
Result<GetResult> {
         let range = options.range.clone();
         let response = self.get_request(location, options).await?;
-        let meta = header_meta(location, response.headers(), 
T::HEADER_CONFIG).map_err(|e| {
-            Error::Generic {
-                store: T::STORE,
-                source: Box::new(e),
-            }
-        })?;
-
-        let stream = response
-            .bytes_stream()
-            .map_err(|source| Error::Generic {
-                store: T::STORE,
-                source: Box::new(source),
-            })
-            .boxed();
-
-        Ok(GetResult {
-            range: range.unwrap_or(0..meta.size),
-            payload: GetResultPayload::Stream(stream),
-            meta,
+        get_result::<T>(location, range, response).map_err(|e| 
crate::Error::Generic {
+            store: T::STORE,
+            source: Box::new(e),
         })
     }
 }
+
+struct ContentRange {
+    /// The range of the object returned
+    range: Range<usize>,
+    /// The total size of the object being requested
+    size: usize,
+}
+
+impl ContentRange {
+    /// Parse a content range of the form `bytes 
<range-start>-<range-end>/<size>`
+    ///
+    /// 
<https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Range>
+    fn from_str(s: &str) -> Option<Self> {
+        let rem = s.trim().strip_prefix("bytes ")?;
+        let (range, size) = rem.split_once('/')?;
+        let size = size.parse().ok()?;
+
+        let (start_s, end_s) = range.split_once('-')?;
+
+        let start = start_s.parse().ok()?;
+        let end: usize = end_s.parse().ok()?;
+
+        Some(Self {
+            size,
+            range: start..end + 1,
+        })
+    }
+}
+
+/// A specialized `Error` for get-related errors
+#[derive(Debug, Snafu)]
+#[allow(missing_docs)]
+enum GetResultError {
+    #[snafu(context(false))]
+    Header {
+        source: crate::client::header::Error,
+    },
+
+    #[snafu(display("Received non-partial response when range requested"))]
+    NotPartial,
+
+    #[snafu(display("Content-Range header not present"))]
+    NoContentRange,
+
+    #[snafu(display("Failed to parse value for CONTENT_RANGE header: 
{value}"))]
+    ParseContentRange { value: String },
+
+    #[snafu(display("Content-Range header contained non UTF-8 characters"))]
+    InvalidContentRange { source: ToStrError },
+
+    #[snafu(display("Requested {expected:?}, got {actual:?}"))]
+    UnexpectedRange {
+        expected: Range<usize>,
+        actual: Range<usize>,
+    },
+}
+
+fn get_result<T: GetClient>(
+    location: &Path,
+    range: Option<Range<usize>>,
+    response: Response,
+) -> Result<GetResult, GetResultError> {
+    let mut meta = header_meta(location, response.headers(), 
T::HEADER_CONFIG)?;
+
+    // ensure that we receive the range we asked for
+    let range = if let Some(expected) = range {
+        ensure!(
+            response.status() == StatusCode::PARTIAL_CONTENT,
+            NotPartialSnafu
+        );
+        let val = response
+            .headers()
+            .get(CONTENT_RANGE)
+            .context(NoContentRangeSnafu)?;
+
+        let value = val.to_str().context(InvalidContentRangeSnafu)?;
+        let value = 
ContentRange::from_str(value).context(ParseContentRangeSnafu { value })?;
+        let actual = value.range;
+
+        ensure!(
+            actual == expected,
+            UnexpectedRangeSnafu { expected, actual }
+        );
+
+        // Update size to reflect full size of object (#5272)
+        meta.size = value.size;
+        actual
+    } else {
+        0..meta.size
+    };
+
+    let stream = response
+        .bytes_stream()
+        .map_err(|source| Error::Generic {
+            store: T::STORE,
+            source: Box::new(source),
+        })
+        .boxed();
+
+    Ok(GetResult {
+        range,
+        meta,
+        payload: GetResultPayload::Stream(stream),
+    })
+}

Review Comment:
   It seems like there should be tests for parsing the response -- 
specifically, with / without the correct status codes and parrsing the contents 
of the content range string



##########
object_store/src/lib.rs:
##########
@@ -1303,12 +1303,22 @@ mod tests {
         let range = 3..7;
         let range_result = storage.get_range(&location, range.clone()).await;
 
+        let bytes = range_result.unwrap();
+        assert_eq!(bytes, expected_data.slice(range.clone()));
+
+        let opts = GetOptions {
+            range: Some(2..5),
+            ..Default::default()
+        };
+        let result = storage.get_opts(&location, opts).await.unwrap();
+        assert_eq!(result.meta.size, 14); // Should return full object size 
(#5272)

Review Comment:
   ```suggestion
           // Data is `"arbitrary data"`, length 14 bytes
           assert_eq!(result.meta.size, 14); // Should return full object size 
(#5272)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to