[Libguestfs] [nbdkit PATCH 1/4] human-size: Add human_size_parse_substr

Eric Blake via Libguestfs Fri, 30 May 2025 15:36:00 -0700

I want to teach the data plugin how to parse @4k out of a larger input
string, but that entails the need for a way to opt in to not erroring
out when an unrecognized suffix is present.


Some of the existing unit tests expose cases where parsing a value
substring out of a larger string can have interesting results; but in
the case of the data plugin, the caller will validate that any
remaining suffix starts with whitespace (and not '.' or a letter),
which should avoid the oddest cases due to the additional context that
the unit test lacks.

Signed-off-by: Eric Blake <ebl...@redhat.com>
---
 common/include/human-size-test-cases.h | 83 ++++++++++++++------------
 common/include/human-size.h            | 57 ++++++++++++------
 common/include/test-human-size.c       | 52 ++++++++++++++--
 server/test-public.c                   | 13 ++--
 4 files changed, 137 insertions(+), 68 deletions(-)

diff --git a/common/include/human-size-test-cases.h 
b/common/include/human-size-test-cases.h
index f351f7c5..ef845f4c 100644
--- a/common/include/human-size-test-cases.h
+++ b/common/include/human-size-test-cases.h
@@ -38,17 +38,22 @@
 /* Just some common test cases shared (in nbdkit) between
  * common/include/test-human-size.c and server/test-public.c
  */
-static struct pair {
+static struct tuple {
   const char *str;
   int64_t res;
-} pairs[] = {
+  const char *tail;
+} tuples[] = {
   /* Bogus strings */
   { "", -1 },
-  { "0x0", -1 },
+  { " ", -1 },
+  { "+ 1", -1 },
   { "garbage", -1 },
-  { "0garbage", -1 },
-  { "8E", -1 },
-  { "8192P", -1 },
+
+  /* Extracting substring value from larger string */
+  { "0junk", 0, "junk" }, /* no suffix j */
+  { "0garbage", 0, "arbage" }, /* 0g recognized */
+  { "1 1", 1, " 1" },
+  { "1M 1", 1024 * 1024, " 1" },

   /* Strings leading to overflow */
   { "9223372036854775808", -1 }, /* INT_MAX + 1 */
@@ -56,6 +61,8 @@ static struct pair {
   { "18446744073709551615", -1 }, /* UINT64_MAX */
   { "18446744073709551616", -1 }, /* UINT64_MAX + 1 */
   { "999999999999999999999999", -1 },
+  { "8E", -1 },
+  { "8192P", -1 },

   /* Strings representing negative values */
   { "-1", -1 },
@@ -68,39 +75,41 @@ static struct pair {
   { "-18446744073709551614", -1 }, /* -UINT64_MAX + 1 */

   /* Strings we may want to support in the future */
-  { "M", -1 },
-  { "1MB", -1 },
-  { "1MiB", -1 },
-  { "1.5M", -1 },
+  { "0x0", 0, "x0" }, /* Nicer would be 0 with tail "" */
+  { "M", -1 }, /* Nicer would be 1024 * 1024 with tail "" */
+  { "1MB", 1024 * 1024, "B" }, /* Nicer would be 1000 * 1000 with tail "" */
+  { "1MiB", 1024 * 1024, "iB" }, /* Nicer would be 1024 * 1024 with tail "" */
+  { "1.5M", 1, ".5M" }, /* Nicer would be 512 * 3 * 1024 with tail "" */
+  { "1.5MB", 1, ".5MB" }, /* Nicer would be 1500 * 1000 with tail "" */

   /* Valid strings */
-  { "-0", 0 },
-  { "0", 0 },
-  { "+0", 0 },
-  { " 08", 8 },
-  { "1", 1 },
-  { "+1", 1 },
-  { "1234567890", 1234567890 },
-  { "+1234567890", 1234567890 },
-  { "9223372036854775807", INT64_MAX },
-  { "1s", 512 },
-  { "2S", 1024 },
-  { "1b", 1 },
-  { "1B", 1 },
-  { "1k", 1024 },
-  { "1K", 1024 },
-  { "1m", 1024 * 1024 },
-  { "1M", 1024 * 1024 },
-  { "+1M", 1024 * 1024 },
-  { "1g", 1024 * 1024 * 1024 },
-  { "1G", 1024 * 1024 * 1024 },
-  { "1t", 1024LL * 1024 * 1024 * 1024 },
-  { "1T", 1024LL * 1024 * 1024 * 1024 },
-  { "1p", 1024LL * 1024 * 1024 * 1024 * 1024 },
-  { "1P", 1024LL * 1024 * 1024 * 1024 * 1024 },
-  { "8191p", 1024LL * 1024 * 1024 * 1024 * 1024 * 8191 },
-  { "1e", 1024LL * 1024 * 1024 * 1024 * 1024 * 1024 },
-  { "1E", 1024LL * 1024 * 1024 * 1024 * 1024 * 1024 },
+  { " -0", 0, "" },
+  { "0", 0, "" },
+  { "+0", 0, "" },
+  { " 08", 8, "" },
+  { "1", 1, "" },
+  { "+1", 1, "" },
+  { "1234567890", 1234567890, "" },
+  { "+1234567890", 1234567890, "" },
+  { "9223372036854775807", INT64_MAX, "" },
+  { "1s", 512, "" },
+  { "2S", 1024, "" },
+  { "1b", 1, "" },
+  { "1B", 1, "" },
+  { "1k", 1024, "" },
+  { "1K", 1024, "" },
+  { "1m", 1024 * 1024, "" },
+  { "1M", 1024 * 1024, "" },
+  { "+1M", 1024 * 1024, "" },
+  { "1g", 1024 * 1024 * 1024, "" },
+  { "1G", 1024 * 1024 * 1024, "" },
+  { "1t", 1024LL * 1024 * 1024 * 1024, "" },
+  { "1T", 1024LL * 1024 * 1024 * 1024, "" },
+  { "1p", 1024LL * 1024 * 1024 * 1024 * 1024, "" },
+  { "1P", 1024LL * 1024 * 1024 * 1024 * 1024, "" },
+  { "8191p", 1024LL * 1024 * 1024 * 1024 * 1024 * 8191, "" },
+  { "1e", 1024LL * 1024 * 1024 * 1024 * 1024 * 1024, "" },
+  { "1E", 1024LL * 1024 * 1024 * 1024 * 1024 * 1024, "" },
 };

 #endif /* NBDKIT_HUMAN_SIZE_TEST_CASES_H */
diff --git a/common/include/human-size.h b/common/include/human-size.h
index 788dbd7b..aa551888 100644
--- a/common/include/human-size.h
+++ b/common/include/human-size.h
@@ -39,14 +39,17 @@

 /* Attempt to parse a string with a possible scaling suffix, such as
  * "2M".  Disk sizes cannot usefully exceed off_t (which is signed)
- * and cannot be negative.
+ * and cannot be negative.  If rest is not NULL, the number being
+ * parsed is treated as a substring within a larger input; if a value
+ * was parsed, *rest is set to the first unparsed byte of str.  If
+ * rest is NULL, any trailing garbage is treated as an error.
  *
  * On error, returns -1 and sets *error and *pstr.  You can form a
  * final error message by appending "<error>: <pstr>".
  */
 static inline int64_t
-human_size_parse (const char *str,
-                  const char **error, const char **pstr)
+human_size_parse_substr (const char *str, char **rest,
+                         const char **error, const char **pstr)
 {
   int64_t size;
   char *end;
@@ -56,6 +59,8 @@ human_size_parse (const char *str,
   /* XXX Should we allow hex? If so, hex cannot use scaling suffixes,
    * because some of them are valid hex digits.
    */
+  if (rest)
+    *rest = NULL;
   errno = 0;
   size = strtoimax (str, &end, 10);
   if (str == end) {
@@ -77,7 +82,7 @@ human_size_parse (const char *str,
   switch (*end) {
     /* No suffix */
   case '\0':
-    end--; /* Safe, since we already filtered out empty string */
+    /* Safe, since we already filtered out empty string */
     break;

     /* Powers of 1024 */
@@ -100,6 +105,7 @@ human_size_parse (const char *str,
     scale *= 1024;
     /* fallthru */
   case 'b': case 'B':
+    end++;
     break;

     /* "sectors", ie. units of 512 bytes, even if that's not the real
@@ -107,22 +113,8 @@ human_size_parse (const char *str,
      */
   case 's': case 'S':
     scale = 512;
+    end++;
     break;
-
-  default:
-    *error = "could not parse size: unknown suffix";
-    *pstr = end;
-    return -1;
-  }
-
-  /* XXX Maybe we should support 'MiB' as a synonym for 'M'; and 'MB'
-   * for powers of 1000, for similarity to GNU tools. But for now,
-   * anything beyond 'M' is dropped.
-   */
-  if (end[1]) {
-    *error = "could not parse size: unknown suffix";
-    *pstr = end;
-    return -1;
   }

   if (INT64_MAX / scale < size) {
@@ -131,7 +123,34 @@ human_size_parse (const char *str,
     return -1;
   }

+  /* XXX Maybe we should support 'MiB' as a synonym for 'M'; and 'MB'
+   * for powers of 1000, for similarity to GNU tools. But for now,
+   * anything beyond 'M' is dropped.
+   */
+  if (rest)
+    *rest = end;
+  else if (*end) {
+    *error = "could not parse size: unknown suffix";
+    *pstr = end;
+    return -1;
+  }
+
   return size * scale;
 }

+/* Attempt to parse a string with a possible scaling suffix, such as
+ * "2M".  Disk sizes cannot usefully exceed off_t (which is signed)
+ * and cannot be negative.  str must not have any trailing garbage.
+ *
+ * On error, returns -1 and sets *error and *pstr.  You can form a
+ * final error message by appending "<error>: <pstr>".
+ */
+static inline int64_t
+human_size_parse (const char *str,
+                  const char **error, const char **pstr)
+{
+  return human_size_parse_substr (str, NULL, error, pstr);
+}
+
+
 #endif /* NBDKIT_HUMAN_SIZE_H */
diff --git a/common/include/test-human-size.c b/common/include/test-human-size.c
index 7f75bb4b..3e388e39 100644
--- a/common/include/test-human-size.c
+++ b/common/include/test-human-size.c
@@ -36,10 +36,11 @@
 #include <stdlib.h>
 #include <stdbool.h>
 #include <stdint.h>
+#include <string.h>

 #include "array-size.h"
 #include "human-size.h"
-#include "human-size-test-cases.h" /* defines 'pairs' below */
+#include "human-size-test-cases.h" /* defines 'tuples' below */

 int
 main (void)
@@ -47,20 +48,59 @@ main (void)
   size_t i;
   bool pass = true;

-  for (i = 0; i < ARRAY_SIZE (pairs); i++) {
+  for (i = 0; i < ARRAY_SIZE (tuples); i++) {
     const char *error = NULL, *pstr = NULL;
+    char *rest = NULL;
     int64_t r;
+    int64_t expect;

-    r = human_size_parse (pairs[i].str, &error, &pstr);
-    if (r != pairs[i].res) {
+    r = human_size_parse (tuples[i].str, &error, &pstr);
+    expect = tuples[i].tail && *tuples[i].tail ? -1 : tuples[i].res;
+    if (r != expect) {
       fprintf (stderr,
                "Wrong parse for %s, got %" PRId64 ", expected %" PRId64 "\n",
-               pairs[i].str, r, pairs[i].res);
+               tuples[i].str, r, expect);
       pass = false;
     }
     if (r == -1) {
       if (error == NULL || pstr == NULL) {
-        fprintf (stderr, "Wrong error message handling for %s\n", 
pairs[i].str);
+        fprintf (stderr, "Wrong error message handling for %s\n",
+                 tuples[i].str);
+        pass = false;
+      }
+    }
+
+    r = human_size_parse_substr (tuples[i].str, &rest, &error, &pstr);
+    if (r != tuples[i].res) {
+      fprintf (stderr,
+               "Wrong parse for %s, got %" PRId64 ", expected %" PRId64 "\n",
+               tuples[i].str, r, tuples[i].res);
+      pass = false;
+    }
+    if (r == -1) {
+      if (error == NULL || pstr == NULL) {
+        fprintf (stderr, "Wrong error message handling for %s\n",
+                 tuples[i].str);
+        pass = false;
+      }
+      if (rest != NULL) {
+        fprintf (stderr,
+                 "Wrong suffix handling for %s, expected NULL, got '%s'\n",
+                 tuples[i].str, rest);
+        pass = false;
+      }
+    }
+    else {
+      if (rest == NULL) {
+        fprintf (stderr,
+                 "Wrong suffix handling for %s, expected '%s', got NULL\n",
+                 tuples[i].str, tuples[i].tail);
+        pass = false;
+      }
+      else if (strcmp (rest, tuples[i].tail) != 0) {
+        fprintf (stderr,
+                 "Wrong suffix handling for %s, expected '%s', got '%s'\n",
+                 tuples[i].str, tuples[i].tail, rest);
         pass = false;
       }
     }
diff --git a/server/test-public.c b/server/test-public.c
index 0eddd6ca..bb30134e 100644
--- a/server/test-public.c
+++ b/server/test-public.c
@@ -110,7 +110,7 @@ backend_default_export (struct backend *b, int readonly)

 /* Unit tests. */

-#include "human-size-test-cases.h" /* defines 'pairs' below */
+#include "human-size-test-cases.h" /* defines 'tuples' below */

 static bool
 test_nbdkit_parse_size (void)
@@ -118,19 +118,20 @@ test_nbdkit_parse_size (void)
   size_t i;
   bool pass = true;

-  for (i = 0; i < ARRAY_SIZE (pairs); i++) {
+  for (i = 0; i < ARRAY_SIZE (tuples); i++) {
     int64_t r;
+    int64_t expect = tuples[i].tail && *tuples[i].tail ? -1 : tuples[i].res;

     error_flagged = false;
-    r = nbdkit_parse_size (pairs[i].str);
-    if (r != pairs[i].res) {
+    r = nbdkit_parse_size (tuples[i].str);
+    if (r != expect) {
       fprintf (stderr,
                "Wrong parse for %s, got %" PRId64 ", expected %" PRId64 "\n",
-               pairs[i].str, r, pairs[i].res);
+               tuples[i].str, r, expect);
       pass = false;
     }
     if ((r == -1) != error_flagged) {
-      fprintf (stderr, "Wrong error message handling for %s\n", pairs[i].str);
+      fprintf (stderr, "Wrong error message handling for %s\n", tuples[i].str);
       pass = false;
     }
   }
-- 
2.49.0
_______________________________________________
Libguestfs mailing list -- guestfs@lists.libguestfs.org
To unsubscribe send an email to guestfs-le...@lists.libguestfs.org

[Libguestfs] [nbdkit PATCH 1/4] human-size: Add human_size_parse_substr

Reply via email to