I've tweaked the patch a bit to simplify some code and expect to push it soon.
Thanks to my friendly LUG I confirmed that solaris 9 and 10 behave as expected for these commands: printf "a a b\nz a a\n" | sort -k2,3.0 printf "a y\na z\n" | sort -k1,1b cheers, Pádraig.
>From 4a1f5d98265cf74297d9e523aa99fca80cc51e3c Mon Sep 17 00:00:00 2001 From: =?utf-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com> Date: Tue, 24 Feb 2009 08:37:18 +0000 Subject: [PATCH] sort: Fix two bugs with determining the end of field * src/sort.c: When no specific number of chars to skip is specified for the end field, always skip the whole field. Also never include leading spaces from next field. * tests/misc/sort: Add 2 new tests for these cases. * NEWS: Mention this bug fix. * THANKS: Add bug reporter. Reported by Davide Canova --- NEWS | 6 ++++++ THANKS | 1 + src/sort.c | 38 +++++++++++++------------------------- tests/misc/sort | 6 ++++++ 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/NEWS b/NEWS index 82ded9d..05d22cb 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,12 @@ GNU coreutils NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** Bug fixes + + sort now handles specified key ends correctly. + Previously -k1,1b would have caused leading space from field 2 to be + included in the sort while -k2,3.0 would have not included field 3. + * Noteworthy changes in release 7.1 (2009-02-21) [stable] diff --git a/THANKS b/THANKS index 5c25321..4b35a37 100644 --- a/THANKS +++ b/THANKS @@ -137,6 +137,7 @@ David Godfrey d...@delta.demon.co.uk David Luyer david_lu...@pacific.net.au David Madore david.mad...@ens.fr David Malone dwmal...@cnri.dit.ie +Davide Canova kc.can...@gmail.com Dawson Engler eng...@stanford.edu Dean Gaudet dean-savan...@arctic.org Deepak Goel de...@gnufans.org diff --git a/src/sort.c b/src/sort.c index f438563..27726a5 100644 --- a/src/sort.c +++ b/src/sort.c @@ -1366,7 +1366,6 @@ begfield (const struct line *line, const struct keyfield *key) char *ptr = line->text, *lim = ptr + line->length - 1; size_t sword = key->sword; size_t schar = key->schar; - size_t remaining_bytes; /* The leading field separator itself is included in a field when -t is absent. */ @@ -1392,12 +1391,7 @@ begfield (const struct line *line, const struct keyfield *key) while (ptr < lim && blanks[to_uchar (*ptr)]) ++ptr; - /* Advance PTR by SCHAR (if possible), but no further than LIM. */ - remaining_bytes = lim - ptr; - if (schar < remaining_bytes) - ptr += schar; - else - ptr = lim; + ptr = MIN (lim, ptr + schar); return ptr; } @@ -1410,7 +1404,9 @@ limfield (const struct line *line, const struct keyfield *key) { char *ptr = line->text, *lim = ptr + line->length - 1; size_t eword = key->eword, echar = key->echar; - size_t remaining_bytes; + + if (echar == 0) + eword++; /* Skip all of end field. */ /* Move PTR past EWORD fields or to one past the last byte on LINE, whichever comes first. If there are more than EWORD fields, leave @@ -1487,19 +1483,14 @@ limfield (const struct line *line, const struct keyfield *key) } #endif - /* If we're ignoring leading blanks when computing the End - of the field, don't start counting bytes until after skipping - past any leading blanks. */ - if (key->skipeblanks) - while (ptr < lim && blanks[to_uchar (*ptr)]) - ++ptr; + if (echar != 0) /* We need to skip over a portion of the end field. */ + { + if (key->skipeblanks) /* blanks not counted in echar. */ + while (ptr < lim && blanks[to_uchar (*ptr)]) + ++ptr; - /* Advance PTR by ECHAR (if possible), but no further than LIM. */ - remaining_bytes = lim - ptr; - if (echar < remaining_bytes) - ptr += echar; - else - ptr = lim; + ptr = MIN (lim, ptr + echar); + } return ptr; } @@ -3152,12 +3143,9 @@ main (int argc, char **argv) badfieldspec (optarg, N_("field number is zero")); } if (*s == '.') - s = parse_field_count (s + 1, &key->echar, - N_("invalid number after `.'")); - else { - /* `-k 2,3' is equivalent to `+1 -3'. */ - key->eword++; + s = parse_field_count (s + 1, &key->echar, + N_("invalid number after `.'")); } s = set_ordering (s, key, bl_end); } diff --git a/tests/misc/sort b/tests/misc/sort index 3e8eda6..3af2388 100755 --- a/tests/misc/sort +++ b/tests/misc/sort @@ -110,6 +110,8 @@ my @Tests = ["07b", '-k 2,3', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}], ["07c", '-k 2,3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}], ["07d", '+1 -3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}], +# ensure a character position of 0 includes whole field +["07e", '-k 2,3.0', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}], # # report an error for `.' without following char spec ["08a", '-k 2.,3', {EXIT=>2}, @@ -210,6 +212,10 @@ my @Tests = # key start and key end. ["18e", '-nb -k1.1,1.2', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}], +# When ignoring leading blanks for end position, ensure blanks from +# next field are not included in the sort. I.E. order should not change here. +["18f", '-k1,1b', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"}], + # This looks odd, but works properly -- 2nd keyspec is never # used because all lines are different. ["19a", '+0 +1nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 1\nb 2\nb 3\n"}], -- 1.5.3.6
_______________________________________________ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils