This is an automated email from the ASF dual-hosted git repository.

asolimando pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 6300c7964c [CALCITE-7321] FilesTableFunction throws 
NumberFormatException on macOS with GNU stat installed
6300c7964c is described below

commit 6300c7964c78592f4c276dc25e21fe35f80e1846
Author: Alessandro Solimando <[email protected]>
AuthorDate: Tue Dec 9 14:29:57 2025 +0100

    [CALCITE-7321] FilesTableFunction throws NumberFormatException on macOS 
with GNU stat installed
---
 .../calcite/adapter/os/FilesTableFunction.java     | 82 ++++++++++++++++++----
 1 file changed, 70 insertions(+), 12 deletions(-)

diff --git 
a/plus/src/main/java/org/apache/calcite/adapter/os/FilesTableFunction.java 
b/plus/src/main/java/org/apache/calcite/adapter/os/FilesTableFunction.java
index c7de135d49..8c7b3cc815 100644
--- a/plus/src/main/java/org/apache/calcite/adapter/os/FilesTableFunction.java
+++ b/plus/src/main/java/org/apache/calcite/adapter/os/FilesTableFunction.java
@@ -44,7 +44,8 @@
 public class FilesTableFunction {
 
   private static final BigDecimal THOUSAND = BigDecimal.valueOf(1000L);
-
+  private static final String SINGLE_QUOTE =
+      "Path with single quote characters are not supported";
   private FilesTableFunction() {
   }
 
@@ -115,9 +116,9 @@ private Enumerable<String> sourceLinux() {
 
       private Enumerable<String> sourceMacOs() {
         if (path.contains("'")) {
-          // no injection monkey business
-          throw new IllegalArgumentException();
+          throw new IllegalArgumentException(SINGLE_QUOTE);
         }
+        // BSD stat format specifiers: 
https://man.freebsd.org/cgi/man.cgi?query=stat
         final String[] args = {"/bin/sh", "-c", "find '" + path
               + "' | xargs stat -f "
               + "%a%n" // access_time
@@ -144,6 +145,49 @@ private Enumerable<String> sourceMacOs() {
         return Processes.processLines('\n', args);
       }
 
+      private Enumerable<String> sourceGnuStat() {
+        if (path.contains("'")) {
+          throw new IllegalArgumentException(SINGLE_QUOTE);
+        }
+        // GNU stat format specifiers:
+        // 
https://www.gnu.org/software/coreutils/manual/html_node/stat-invocation.html
+        // format string must have exactly 20 lines per file to match the 
schema
+        final String[] args = {"/bin/sh", "-c", "find '" + path
+              + "' | xargs stat -c '"
+              + "%X\n" // access_time
+              + "%b\n" // block_count
+              + "%Z\n" // change_time
+              + "0\n" // depth: computed later based on path
+              + "%d\n" // device
+              + "filename\n" // filename: computed later from path
+              + "%F\n" // fstype (file system type)
+              + "%G\n" // gname
+              + "%g\n" // gid
+              + "dirname\n" // dir_name: computed later from path
+              + "%i\n\n" // inode (followed by empty line for link)
+              + "%a\n" // perm
+              + "%h\n" // hard
+              + "%n\n" // path
+              + "%s\n" // size
+              + "%Y\n" // mod_time
+              + "%U\n" // user
+              + "%u\n" // uid
+              + "%F'" // type
+        };
+        return Processes.processLines('\n', args);
+      }
+
+      private boolean isGnuStat() {
+        try {
+          // BSD stat doesn't support --version, so we use this to detect GNU 
stat
+          final String[] args = {"stat", "--version"};
+          return Processes.processLines('\n', args)
+              .any(line -> line.contains("GNU coreutils"));
+        } catch (RuntimeException e) {
+          return false;
+        }
+      }
+
       @Override public Enumerable<@Nullable Object[]> scan(DataContext root) {
         JavaTypeFactory typeFactory = root.getTypeFactory();
         final RelDataType rowType = getRowType(typeFactory);
@@ -154,8 +198,9 @@ private Enumerable<String> sourceMacOs() {
         Util.discard(osVersion);
         final Enumerable<String> enumerable;
         switch (osName) {
-        case "Mac OS X": // tested on version 10.12.5
-          enumerable = sourceMacOs();
+        case "Mac OS X": // tested on versions 10.12.5 and 15.6.1
+          // detect GNU vs BSD stat and adapt the format accordingly
+          enumerable = isGnuStat() ? sourceGnuStat() : sourceMacOs();
           break;
         default:
           enumerable = sourceLinux();
@@ -187,7 +232,7 @@ private Enumerable<String> sourceMacOs() {
                 }
                 switch (osName) {
                 case "Mac OS X":
-                  // Strip leading "./"
+                  // post-process fields: compute filename, dir_name, depth 
from path
                   String path = requireNonNull((String) current[14]);
                   if (".".equals(path)) {
                     current[14] = path = "";
@@ -207,12 +252,25 @@ private Enumerable<String> sourceMacOs() {
                     current[9] = ""; // dir_name
                   }
 
-                  // Make type values more like those on Linux
-                  final String type = (String) current[19];
-                  current[19] = "/".equals(type) ? "d"
-                      : "".equals(type) || "*".equals(type) ? "f"
-                      : "@".equals(type) ? "l"
-                      : type;
+                  // detect output format: BSD outputs single chars, GNU 
outputs words
+                  final String type = requireNonNull((String) current[19]);
+                  if (type.length() > 1) {
+                    // GNU stat outputs descriptive types like "regular file", 
"directory"
+                    current[19] = type.contains("directory") ? "d"
+                        : type.contains("regular") ? "f"
+                        : type.contains("symbolic") ? "l"
+                        : type.contains("block") ? "b"
+                        : type.contains("character") ? "c"
+                        : type.contains("fifo") ? "p"
+                        : type.contains("socket") ? "s"
+                        : "?";
+                  } else {
+                    // BSD stat outputs single characters: "/" "*" "@" or ""
+                    current[19] = "/".equals(type) ? "d"
+                        : "".equals(type) || "*".equals(type) ? "f"
+                        : "@".equals(type) ? "l"
+                        : type;
+                  }
                   break;
                 default:
                   break;

Reply via email to