Hi,
Ideally join should be able to handle files sorted in any order
that sort provides, but as a bare minimum it should at least
be able to join files sorted on numeric fields.
The attached simple patch provides -n, --numeric-sort
options to this effect.
--
Alex
--- src/join.c~ 2010-04-21 04:52:04.000000000 +0900
+++ src/join.c 2010-05-27 15:04:40.898651732 +0900
@@ -145,6 +145,7 @@
static struct option const longopts[] =
{
{"ignore-case", no_argument, NULL, 'i'},
+ {"numeric-sort", no_argument, NULL, 'n'},
{"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
{"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
{"header", no_argument, NULL, HEADER_LINE_OPTION},
@@ -159,6 +160,9 @@
/* If nonzero, ignore case when comparing join fields. */
static bool ignore_case;
+/* If nonzero, treat keys as numeric values. */
+static bool numeric_sort;
+
/* If nonzero, treat the first line of each file as column headers -
join them without checking for ordering */
static bool join_header_lines;
@@ -186,6 +190,7 @@
"), stdout);
fputs (_("\
-i, --ignore-case ignore differences in case when comparing fields\n\
+ -n, --numeric-sort compare according to string numerical calue\n\
-j FIELD equivalent to `-1 FIELD -2 FIELD'\n\
-o FORMAT obey FORMAT while constructing output line\n\
-t CHAR use CHAR as input and output field separator\n\
@@ -304,6 +309,7 @@
size_t len1;
size_t len2; /* Length of fields to compare. */
+ long double x1, x2;
int diff;
if (jf_1 < line1->nfields)
@@ -333,7 +339,13 @@
if (len2 == 0)
return 1;
- if (ignore_case)
+ if (numeric_sort)
+ {
+ x1 = strtold(beg1, NULL);
+ x2 = strtold(beg2, NULL);
+ diff = x1 - x2;
+ }
+ else if (ignore_case)
{
/* FIXME: ignore_case does not work with NLS (in particular,
with multibyte chars). */
@@ -971,7 +983,7 @@
issued_disorder_warning[0] = issued_disorder_warning[1] = false;
check_input_order = CHECK_ORDER_DEFAULT;
- while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:",
+ while ((optc = getopt_long (argc, argv, "-a:e:in1:2:j:o:t:v:",
longopts, NULL))
!= -1)
{
@@ -1008,6 +1020,10 @@
ignore_case = true;
break;
+ case 'n':
+ numeric_sort = true;
+ break;
+
case '1':
set_join_field (&join_field_1, string_to_join_field (optarg));
break;