clbarnes commented on code in PR #5222:
URL: https://github.com/apache/arrow-rs/pull/5222#discussion_r1439560391


##########
object_store/src/util.rs:
##########
@@ -167,6 +178,501 @@ fn merge_ranges(ranges: &[std::ops::Range<usize>], 
coalesce: usize) -> Vec<std::
     ret
 }
 
+/// A single range in a `Range` request.
+///
+/// These can be created from [usize] ranges, like
+///
+/// ```rust
+/// # use byteranges::request::HttpRange;
+/// let range1: HttpRange = (50..150).into();
+/// let range2: HttpRange = (50..=150).into();
+/// let range3: HttpRange = (50..).into();
+/// let range4: HttpRange = (..150).into();
+/// ```
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub enum GetRange {
+    /// A range with a given first and last bytes.
+    Bounded(Range<usize>),
+    /// A range defined only by the first byte requested (requests all 
remaining bytes).
+    Offset(usize),
+    /// A range defined as the number of bytes at the end of the resource.
+    Suffix(usize),
+}
+
+impl PartialOrd for GetRange {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        use std::cmp::Ordering::*;
+        use GetRange::*;
+        // `Suffix`es cannot be compared with `Range`s or `Offset`s.
+        // `Range`s and `Offset`s are compared by their first byte,
+        // using the last byte as a tiebreaker (`Offset`s always greater than 
`Range`s).
+        match self {
+            Bounded(r1) => match other {
+                Bounded(r2) => match r1.start.cmp(&r2.start) {
+                    Equal => Some(r1.end.cmp(&r2.end)),
+                    o => Some(o),
+                },
+                Offset(f2) => match r1.start.cmp(f2) {
+                    Equal => Some(Less),
+                    o => Some(o),
+                },
+                Suffix { .. } => None,
+            },
+            Offset(f1) => match other {
+                Bounded(r2) => match f1.cmp(&r2.start) {
+                    Equal => Some(Greater),
+                    o => Some(o),
+                },
+                Offset(f2) => Some(f1.cmp(f2)),
+                Suffix { .. } => None,
+            },
+            Suffix(b1) => match other {
+                Suffix(b2) => Some(b2.cmp(b1)),
+                _ => None,
+            },
+        }
+    }
+}
+
+#[derive(Debug, Snafu)]
+pub enum RangeMergeError {
+    #[snafu(display("Ranges could not be merged because exactly one was a 
suffix"))]
+    DifferentTypes,
+    #[snafu(display("Ranges could not be merged because they were too far 
apart"))]
+    Spread,
+}
+
+#[derive(Debug, Snafu)]
+pub enum InvalidGetRange {
+    #[snafu(display("Wanted suffix with {expected}B, resource was {actual}B 
long"))]
+    SuffixTooLarge { expected: usize, actual: usize },
+
+    #[snafu(display("Wanted range starting at {expected}, resource was 
{actual}B long"))]
+    StartTooLarge { expected: usize, actual: usize },
+
+    #[snafu(display("Wanted range ending at {expected}, resource was {actual}B 
long"))]
+    EndTooLarge { expected: usize, actual: usize },
+
+    #[snafu(display("Range started at {start} and ended at {end}"))]
+    Inconsistent { start: usize, end: usize },
+}
+
+impl GetRange {
+    /// Create a range representing the whole resource.
+    pub fn new_whole() -> Self {
+        Self::Offset(0)
+    }
+
+    /// Whether this is an offset.
+    pub fn is_offset(&self) -> bool {
+        match self {
+            GetRange::Offset(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Whether this is a range.
+    pub fn is_range(&self) -> bool {
+        match self {
+            GetRange::Bounded(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Whether this is an offset.
+    pub fn is_suffix(&self) -> bool {
+        match self {
+            GetRange::Suffix(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Whether the range has no bytes in it (i.e. the last byte is before the 
first).
+    ///
+    /// [None] if the range is an `Offset` or `Suffix`.
+    /// The response may still be empty.
+    pub fn is_empty(&self) -> Option<bool> {
+        match self {
+            GetRange::Bounded(r) => Some(r.is_empty()),
+            _ => None,
+        }
+    }
+
+    /// Whether the range represents the entire resource (i.e. it is an 
`Offset` of 0).
+    ///
+    /// [None] if the range is a `Range` or `Suffix`.
+    /// The response may still be the full resource.
+    pub fn is_whole(&self) -> Option<bool> {
+        match self {
+            GetRange::Offset(first) => Some(first == &0),
+            _ => None,
+        }
+    }
+
+    /// How many bytes the range is requesting.
+    ///
+    /// Note that the server may respond with a different number of bytes,
+    /// depending on the length of the resource and other behaviour.
+    pub fn nbytes(&self) -> Option<usize> {
+        match self {
+            GetRange::Bounded(r) => Some(r.end.saturating_sub(r.start)),
+            GetRange::Offset(_) => None,
+            GetRange::Suffix(n) => Some(*n),
+        }
+    }
+
+    /// The index of the first byte requested ([None] for suffix).
+    pub fn first_byte(&self) -> Option<usize> {
+        match self {
+            GetRange::Bounded(r) => Some(r.start),
+            GetRange::Offset(o) => Some(*o),
+            GetRange::Suffix(_) => None,
+        }
+    }
+
+    /// The index of the last byte requested ([None] for offset or suffix).
+    pub fn last_byte(&self) -> Option<usize> {
+        match self {
+            GetRange::Bounded(r) => match r.end {
+                0 => None,
+                n => Some(n),
+            },
+            GetRange::Offset { .. } => None,
+            GetRange::Suffix { .. } => None,
+        }
+    }
+
+    pub(crate) fn as_range(&self, len: usize) -> Result<Range<usize>, 
InvalidGetRange> {
+        match self {
+            Self::Bounded(r) => {
+                if r.start >= len {
+                    Err(InvalidGetRange::StartTooLarge {
+                        expected: r.start,
+                        actual: len,
+                    })
+                } else if r.end <= r.start {
+                    Err(InvalidGetRange::Inconsistent {
+                        start: r.start,
+                        end: r.end,
+                    })
+                } else if r.end >= len {
+                    Err(InvalidGetRange::EndTooLarge {
+                        expected: r.end,
+                        actual: len,
+                    })
+                } else {
+                    Ok(r.clone())
+                }
+            }
+            Self::Offset(o) => {
+                if o >= &len {
+                    Err(InvalidGetRange::StartTooLarge {
+                        expected: *o,
+                        actual: len,
+                    })
+                } else {
+                    Ok(*o..len)
+                }
+            }
+            Self::Suffix(n) => {
+                len.checked_sub(*n)
+                    .map(|start| start..len)
+                    .ok_or(InvalidGetRange::SuffixTooLarge {
+                        expected: *n,
+                        actual: len,
+                    })
+            }
+        }
+    }
+
+    /// Merge two ranges which fall within a certain distance `coalesce` of 
each other.
+    ///
+    /// Error if exactly one is a suffix or if the ranges are too far apart.
+    pub(crate) fn try_merge(
+        &self,
+        other: &GetRange,
+        coalesce: usize,
+    ) -> Result<Self, RangeMergeError> {
+        use GetRange::*;
+
+        let (g1, g2) = match self.partial_cmp(other) {
+            None => {
+                // One is a suffix, the other isn't.
+                // This is escapable if one represents the whole resource.
+                if let Some(whole) = self.is_whole() {
+                    if whole {
+                        return Ok(GetRange::new_whole());
+                    }
+                }
+                if let Some(whole) = other.is_whole() {
+                    if whole {
+                        return Ok(GetRange::new_whole());
+                    }
+                }
+                return Err(RangeMergeError::DifferentTypes);
+            }
+            Some(o) => match o {
+                std::cmp::Ordering::Greater => (other, self),
+                _ => (self, other),
+            },
+        };
+
+        match g1 {
+            Bounded(r1) => match g2 {
+                Bounded(r2) => {
+                    if r2.start <= r1.end.saturating_add(coalesce) {
+                        Ok(GetRange::Bounded(r1.start..r1.end.max(r2.end)))
+                    } else {
+                        Err(RangeMergeError::Spread)
+                    }
+                }
+                Offset(first) => {
+                    if first < &(r1.end.saturating_add(coalesce)) {
+                        Ok(GetRange::Offset(r1.start))
+                    } else {
+                        Err(RangeMergeError::Spread)
+                    }
+                }
+                Suffix { .. } => unreachable!(),
+            },
+            // Either an offset or suffix, both of which would contain the 
second range.
+            _ => Ok(g1.clone()),
+        }
+    }
+
+    pub fn matches_range(&self, range: Range<usize>, len: usize) -> bool {
+        match self {
+            Self::Bounded(r) => r == &range,
+            Self::Offset(o) => o == &range.start && len == range.end,
+            Self::Suffix(n) => range.end == len && range.start == len - n,
+        }
+    }
+}
+
+/// Returns a sorted [Vec] of [HttpRange::Offset] and [HttpRange::Range] that 
cover `ranges`,
+/// and a single [HttpRange::Suffix] if one or more are given.
+/// The suffix is also omitted if any of the ranges is the whole resource 
(`0-`).
+///
+/// The suffix is returned separately as it may still overlap with the other 
ranges,
+/// so the caller may want to handle it differently.
+pub fn merge_get_ranges<T: Into<GetRange> + Clone>(

Review Comment:
   This was written in anticipation of `GetRange`s being used under the hood 
for all of the range-fetching machinery (as I intend to do that in a separate 
library); it can certainly be omitted here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to