scovich commented on code in PR #9012:
URL: https://github.com/apache/arrow-rs/pull/9012#discussion_r2636626718


##########
parquet-variant/src/path.rs:
##########
@@ -112,11 +114,7 @@ impl<'a> From<Vec<VariantPathElement<'a>>> for 
VariantPath<'a> {
 /// Create from &str with support for dot notation
 impl<'a> From<&'a str> for VariantPath<'a> {
     fn from(path: &'a str) -> Self {
-        if path.is_empty() {
-            VariantPath::new(vec![])
-        } else {
-            VariantPath::new(path.split('.').map(Into::into).collect())
-        }
+        VariantPath::new(path.split(".").flat_map(parse_path).collect())

Review Comment:
   A bit of quality time with an LLM produced the following (along with a bunch 
of unit tests to validate it works):
   
   <details>
   
   ```rust
   /// Create from &str with support for dot notation and array indices.
   ///
   /// # Example
   /// ```
   /// # use parquet_variant::VariantPath;
   /// let path: VariantPath = "foo.bar[0]".try_into().unwrap();
   /// ```
   impl<'a> TryFrom<&'a str> for VariantPath<'a> {
       type Error = ArrowError;
   
       fn try_from(path: &'a str) -> Result<Self, Self::Error> {
           parse_path(path).map(VariantPath::new)
       }
   }
   
   /// Parse a path string into a vector of [`VariantPathElement`].
   ///
   /// Supports the following syntax:
   /// - `""` - empty path
   /// - `"foo"` - single field
   /// - `"foo.bar"` - nested fields
   /// - `"[1]"` - single array index
   /// - `"[1][2]"` - multiple array indices
   /// - `"foo[1]"` - field with array index
   /// - `"foo[1][2]"` - field with multiple array indices
   /// - `"foo[1].bar"` - mixed field and array access
   /// - etc.
   ///
   /// Field names can contain any characters except `.`, `[`, and `]`.
   fn parse_path(s: &str) -> Result<Vec<VariantPathElement<'_>>, ArrowError> {
       let scan_field = |start: usize| {
           s[start..].find(['.', '[', ']']).map_or_else(|| s.len(), |p| start + 
p)
       };
   
       let mut elements = Vec::new();
       let bytes = s.as_bytes();
       let mut i = 0;
   
       while i < bytes.len() {
           let (elem, end) = match bytes[i] {
               b'.' if i == 0 => {
                   return Err(ArrowError::ParseError("unexpected leading 
'.'".into()));
               }
               b'.' => {
                   i += 1; // skip the dot; a field must follow
                   let end = scan_field(i);
                   if end == i {
                       return Err(ArrowError::ParseError(match bytes.get(i) {
                           None => "path cannot end with '.'".into(),
                           Some(&c) => format!("unexpected '{}' at byte {i}", c 
as char),
                       }));
                   }
                   (VariantPathElement::field(&s[i..end]), end)
               }
               b'[' => {
                   let (idx, end) = parse_index(s, i)?;
                   (VariantPathElement::index(idx), end)
               }
               b']' => {
                   return Err(ArrowError::ParseError(format!(
                       "unexpected ']' at byte {i}"
                   )));
               }
               _ => {
                   let end = scan_field(i);
                   (VariantPathElement::field(&s[i..end]), end)
               }
           };
           elements.push(elem);
           i = end;
       }
   
       Ok(elements)
   }
   
   
   /// Parse `[digits]` starting at `i` (which points to `[`).
   /// Returns (index_value, position after `]`).
   fn parse_index(s: &str, i: usize) -> Result<(usize, usize), ArrowError> {
       let start = i + 1; // skip '['
   
       // Find closing ']'
       let end = match s[start..].find(']') {
           Some(p) => start + p,
           None => return Err(ArrowError::ParseError(format!("unterminated '[' 
at byte {i}"))),
       };
   
       let idx = s[start..end]
           .parse()
           .map_err(|_| ArrowError::ParseError(format!("invalid index at byte 
{start}")))?;
   
       Ok((idx, end + 1))
   }
   ```
   
   </details>



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to