Hi,

Ideally join should be able to handle files sorted in any order
that sort provides, but as a bare minimum it should at least
be able to join files sorted on numeric fields.

The attached simple patch provides -n, --numeric-sort
options to this effect.

-- 
Alex
--- src/join.c~	2010-04-21 04:52:04.000000000 +0900
+++ src/join.c	2010-05-27 15:04:40.898651732 +0900
@@ -145,6 +145,7 @@
 static struct option const longopts[] =
 {
   {"ignore-case", no_argument, NULL, 'i'},
+  {"numeric-sort", no_argument, NULL, 'n'},
   {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
   {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
   {"header", no_argument, NULL, HEADER_LINE_OPTION},
@@ -159,6 +160,9 @@
 /* If nonzero, ignore case when comparing join fields.  */
 static bool ignore_case;
 
+/* If nonzero, treat keys as numeric values.  */
+static bool numeric_sort;
+
 /* If nonzero, treat the first line of each file as column headers -
    join them without checking for ordering */
 static bool join_header_lines;
@@ -186,6 +190,7 @@
 "), stdout);
       fputs (_("\
   -i, --ignore-case  ignore differences in case when comparing fields\n\
+  -n, --numeric-sort compare according to string numerical calue\n\
   -j FIELD          equivalent to `-1 FIELD -2 FIELD'\n\
   -o FORMAT         obey FORMAT while constructing output line\n\
   -t CHAR           use CHAR as input and output field separator\n\
@@ -304,6 +309,7 @@
 
   size_t len1;
   size_t len2;		/* Length of fields to compare.  */
+  long double x1, x2;
   int diff;
 
   if (jf_1 < line1->nfields)
@@ -333,7 +339,13 @@
   if (len2 == 0)
     return 1;
 
-  if (ignore_case)
+  if (numeric_sort)
+    {
+      x1 = strtold(beg1, NULL);
+      x2 = strtold(beg2, NULL);
+      diff = x1 - x2;
+    }
+  else if (ignore_case)
     {
       /* FIXME: ignore_case does not work with NLS (in particular,
          with multibyte chars).  */
@@ -971,7 +983,7 @@
   issued_disorder_warning[0] = issued_disorder_warning[1] = false;
   check_input_order = CHECK_ORDER_DEFAULT;
 
-  while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:",
+  while ((optc = getopt_long (argc, argv, "-a:e:in1:2:j:o:t:v:",
                               longopts, NULL))
          != -1)
     {
@@ -1008,6 +1020,10 @@
           ignore_case = true;
           break;
 
+        case 'n':
+          numeric_sort = true;
+          break;
+
         case '1':
           set_join_field (&join_field_1, string_to_join_field (optarg));
           break;

Reply via email to