Hi Pádraig,

Finally, I found time to work again on this patch. I provided it as
attachment.

I'm not completely satisfied with the documentation part. I've tried to
be more specific but it becomes quickly complicated. So I get back the
original explanation which does not completely satisfied me since it
does not explain the following point:
- When -a is not specified:
  - output file names are considered exhausted when the first suffix
    character will become 'z'.
  -'z[a-z]' suffixes are never used
  - the fact that suffix length is increased each time the suffix are
  "exhausted".
But maybe it's not relevant to be so specific.

What is your opinion about this point ?

> Good. That's what I'd prefer anyway so as to be compatible with old
> data sets. Note '.' sorts before digits (-d) too, so there should be
> no ordering issues with --additional-suffix=... either.
In fact, it depends on the current locale. With my locale, "." is not
sorted this way.

Cheers,

Jérémy
From 9060e4b80a8b038b2de7f68e07f306f8d98f18d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Compostella?= <[email protected]>
Date: Thu, 1 Mar 2012 20:37:41 +0100
Subject: [PATCH] split: add unlimited split files as default behavior

* src/split.c (next_file_name): If `suffix_auto' is true and the first
suffix character is 'z', generate a new file file name adding `z' to
the prefix and increasing the suffix length by one.
(main): If `-a' is specified, disable unlimited split files feature.
* doc/coreutils.texi (split invocation): Mention it.
* NEWS (Improvements): Mention it.
---
 NEWS               |    2 ++
 doc/coreutils.texi |    8 +++++---
 src/split.c        |   41 +++++++++++++++++++++++++++++++++++++----
 3 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/NEWS b/NEWS
index 8006669..e0f5d6c 100644
--- a/NEWS
+++ b/NEWS
@@ -37,6 +37,8 @@ GNU coreutils NEWS                                    -*- outline -*-
   systems for which getfilecon-, ACL-check- and XATTR-check-induced syscalls
   fail with ENOTSUP or similar.
 
+  split now supports unlimited number of split files as default behavior.
+
 
 * Noteworthy changes in release 8.15 (2012-01-06) [stable]
 
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 4a4cadb..2ab61a4 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -2990,9 +2990,11 @@ The output files' names consist of @var{prefix} (@samp{x} by default)
 followed by a group of characters (@samp{aa}, @samp{ab}, @dots{} by
 default), such that concatenating the output files in traditional
 sorted order by file name produces the original input file (except
-@option{-nr/@var{n}}).  If the output file names are exhausted,
-@command{split} reports an error without deleting the output files
-that it did create.
+@option{-nr/@var{n}}). If more than 676 output files are required and
+@samp{-a} is not specified, @command{split} uses @samp{zaaa},
+@samp{zaab}, etc.  If the @option{-a} is specified and the output file
+names are exhausted, @command{split} reports an error without deleting
+the output files that it did create.
 
 The program accepts the following options.  Also see @ref{Common options}.
 
diff --git a/src/split.c b/src/split.c
index 68c9a34..34ba092 100644
--- a/src/split.c
+++ b/src/split.c
@@ -74,6 +74,9 @@ static char *outfile;
    Suffixes are inserted here.  */
 static char *outfile_mid;
 
+/* Generate new suffix when suffixes are exhausted.  */
+static bool suffix_auto = true;
+
 /* Length of OUTFILE's suffix.  */
 static size_t suffix_length;
 
@@ -242,14 +245,18 @@ next_file_name (void)
 {
   /* Index in suffix_alphabet of each character in the suffix.  */
   static size_t *sufindex;
+  static size_t outbase_length;
+  static size_t outfile_length;
+  static size_t addsuf_length;
 
   if (! outfile)
     {
       /* Allocate and initialize the first file name.  */
 
-      size_t outbase_length = strlen (outbase);
-      size_t addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
-      size_t outfile_length = outbase_length + suffix_length + addsuf_length;
+      outbase_length = strlen (outbase);
+      addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
+      outfile_length = outbase_length + suffix_length + addsuf_length;
+
       if (outfile_length + 1 < outbase_length)
         xalloc_die ();
       outfile = xmalloc (outfile_length + 1);
@@ -295,13 +302,38 @@ next_file_name (void)
       while (i-- != 0)
         {
           sufindex[i]++;
+          if (suffix_auto && i == 0 && !suffix_alphabet[sufindex[i] + 1])
+            break;
           outfile_mid[i] = suffix_alphabet[sufindex[i]];
           if (outfile_mid[i])
             return;
           sufindex[i] = 0;
           outfile_mid[i] = suffix_alphabet[sufindex[i]];
         }
-      error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
+
+      if (!suffix_auto)
+        error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
+
+      /* Otherwise, initialize a new first file name.  */
+
+      outfile_length += 2;
+      outfile = xrealloc (outfile, outfile_length + 1);
+      outfile[outfile_length] = 0;
+
+      /* Append the last alphabet character to the file name
+         prefix.  */
+      outfile[outbase_length] = suffix_alphabet[sufindex[i]];
+      outbase_length++;
+      outfile_mid = outfile + outbase_length;
+
+      /* Extend the suffix length by one.  */
+      suffix_length++;
+      sufindex = xrealloc (sufindex, suffix_length * sizeof *sufindex);
+      memset (sufindex, 0, suffix_length * sizeof *sufindex);
+      memset (outfile_mid, suffix_alphabet[0], suffix_length);
+
+      if (additional_suffix)
+        memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length);
     }
 }
 
@@ -1059,6 +1091,7 @@ main (int argc, char **argv)
                 usage (EXIT_FAILURE);
               }
             suffix_length = tmp;
+            suffix_auto = false;
           }
           break;
 
-- 
1.7.2.5

Reply via email to