commit 98a12cc14637306f8ed9522643229c889cff038e
Author: Jakob Kramer <[email protected]>
Date:   Sat Apr 12 17:53:10 2014 +0200

    sort: add -k, fix -u
    
    Options  that are  specific to  a  single  key  definition are not
    supported (e.g. "sort -k 2,3n -k 4,4").  Should you try to specify
    such definitions, sort will  return with EXIT_FAILURE and an error
    message.   Instead, all key definitions exclusively use the global
    settings.
    
    It always behaves like -b was set.

diff --git a/sort.1 b/sort.1
index 7913357..899efe2 100644
--- a/sort.1
+++ b/sort.1
@@ -4,6 +4,9 @@ sort \- sort lines
 .SH SYNOPSIS
 .B sort
 .RB [ \-nru ]
+.RB [ \-k
+.I key
+.R ]...
 .RI [ file ...]
 .SH DESCRIPTION
 .B sort
@@ -18,4 +21,18 @@ perform a numeric sort.
 reverses the sort.
 .TP
 .B \-u
-prints repeated lines only once.
+prints equal lines only once.
+.TP
+.B \-k key
+specifies a key definition of the form BSR[.BsR][,BER[.BeR]],
+where
+.B S,
+.B s,
+.B E,
+and
+.B e
+are the starting column, starting character in that column, ending column and
+the ending character of that column respectively.  If they are not specified,
+s refers to the first character of the specified starting column, E refers to
+the last column of every line, and e refers to the last character of that last
+column.
diff --git a/sort.c b/sort.c
index 348e16b..d86696c 100644
--- a/sort.c
+++ b/sort.c
@@ -1,4 +1,5 @@
 /* See LICENSE file for copyright and license details. */
+#include <ctype.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -7,7 +8,28 @@
 #include "text.h"
 #include "util.h"
 
+struct keydef {
+       unsigned start_column;
+       unsigned end_column;
+       unsigned start_char;
+       unsigned end_char;
+};
+
+struct kdlist {
+       struct keydef keydef;
+       struct kdlist *next;
+};
+
+static struct kdlist *head = NULL;
+static struct kdlist *curr = NULL;
+
+static void addkeydef(char *);
+static void freelist(void);
 static int linecmp(const char **, const char **);
+static char *next_nonblank(char *);
+static char *next_blank(char *);
+static int parse_keydef(struct keydef *, char *);
+static char *columns(char *, const struct keydef *);
 
 static bool rflag = false;
 static bool uflag = false;
@@ -18,7 +40,7 @@ static struct linebuf linebuf = EMPTY_LINEBUF;
 static void
 usage(void)
 {
-       eprintf("usage: %s [-nru] [file...]
", argv0);
+       enprintf(2, "usage: %s [-nru] [-k def]... [file...]
", argv0);
 }
 
 int
@@ -37,15 +59,20 @@ main(int argc, char *argv[])
        case 'u':
                uflag = true;
                break;
+       case 'k':
+               addkeydef(EARGF(usage()));
+               break;
        default:
                usage();
        } ARGEND;
 
+       addkeydef("1");
+
        if(argc == 0) {
                getlines(stdin, &linebuf);
        } else for(; argc > 0; argc--, argv++) {
                if(!(fp = fopen(argv[0], "r"))) {
-                       weprintf("fopen %s:", argv[0]);
+                       enprintf(2, "fopen %s:", argv[0]);
                        continue;
                }
                getlines(fp, &linebuf);
@@ -55,24 +82,144 @@ main(int argc, char *argv[])
                        (int (*)(const void *, const void *))linecmp);
 
        for(i = 0; i < linebuf.nlines; i++) {
-               if(!uflag || i == 0 || strcmp(linebuf.lines[i],
-                                       linebuf.lines[i-1]) != 0) {
+               if(!uflag || i == 0 || linecmp((const char **)&linebuf.lines[i],
+                                       (const char **)&linebuf.lines[i-1])) {
                        fputs(linebuf.lines[i], stdout);
                }
        }
 
+       freelist();
        return EXIT_SUCCESS;
 }
 
-int
+static void
+addkeydef(char *def)
+{
+       struct kdlist *node;
+
+       node = malloc(sizeof(*node));
+       if(!node)
+               enprintf(2, "malloc:");
+       if(!head)
+               head = node;
+       if(parse_keydef(&node->keydef, def))
+               enprintf(2, "parse_keydef:");
+       if(curr)
+               curr->next = node;
+       node->next = NULL;
+       curr = node;
+}
+
+static void
+freelist(void)
+{
+       struct kdlist *node;
+       struct kdlist *tmp;
+
+       for(node = head; node; node = tmp) {
+               tmp = node->next;
+               free(node);
+       }
+}
+
+static int
 linecmp(const char **a, const char **b)
 {
-       if (nflag) {
-               if (rflag)
-                       return strtoul(*b, 0, 10) - strtoul(*a, 0, 10);
+       char *s1, *s2;
+       int res = 0;
+       struct kdlist *node;
+
+       for(node = head; node && res == 0; node = node->next) {
+               s1 = columns((char *)*a, &node->keydef);
+               s2 = columns((char *)*b, &node->keydef);
+
+               /* don't consider modifiers if it's the default key
+                * definition that was implicitly added */
+               /* if -u is given, don't use default */
+               if(uflag && !(node == head) && !node->next)
+                       res = 0;
+               else if(!(node == head) && !node->next)
+                       res = strcmp(s1, s2);
+               else if(nflag)
+                       res = strtoul(s1, 0, 10) - strtoul(s2, 0, 10);
                else
-                       return strtoul(*a, 0, 10) - strtoul(*b, 0, 10);
+                       res = strcmp(s1, s2);
+
+               free(s1);
+               free(s2);
+       }
+       return rflag ? -res : res;
+}
+
+static int
+parse_keydef(struct keydef *kd, char *s)
+{
+       char *rest = s;
+       kd->start_column = 1;
+       kd->start_char = 1;
+       /* 0 means end of line */
+       kd->end_column = 0;
+       kd->end_char = 0;
+
+       kd->start_column = strtoul(rest, &rest, 10);
+       if(!kd->start_column)
+               enprintf(2, "starting column cannot be 0
");
+       if(*rest == '.')
+               kd->start_char = strtoul(rest+1, &rest, 10);
+       if(*rest == ',') {
+               kd->end_column = strtoul(rest+1, &rest, 10);
+               if(kd->end_column < kd->start_column)
+                       enprintf(2, ",%u is too small
", kd->end_column);
        }
-       return strcmp(*a, *b) * (rflag ? -1 : +1);
+       if(*rest == '.')
+               kd->end_char = strtoul(rest+1, &rest, 10);
+       if(*rest != '

Reply via email to