On 02/02/2014 01:20 AM, Pádraig Brady wrote:
> On 01/31/2014 09:44 AM, Niels Möller wrote:
>> ni...@lysator.liu.se (Niels Möller) writes:
>>
>>> Pádraig Brady <p...@draigbrady.com> writes:
>>>> I agree this would be useful and easy enough to add.
>>>> I suppose the interface would be --endian=little|big
>>>
>>> Maybe I can have a look at what it takes.
>>
>> Below is a crude patch (missing: usage message, tests cases, docs,
>> translation). I think it should work fine for floats too. I see no
>> obvious and more beautiful way to do it. 
>>
>> (And I think I have copyright assignment papers for coreutils in place,
>> since work on factor some year ago).
>>
>> Regards,
>> /Niels
>>
>> diff --git a/src/od.c b/src/od.c
>> index 514fe50..a71e302 100644
>> --- a/src/od.c
>> +++ b/src/od.c
>> @@ -259,13 +259,16 @@ static enum size_spec 
>> integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1];
>>  #define MAX_FP_TYPE_SIZE sizeof (long double)
>>  static enum size_spec fp_type_size[MAX_FP_TYPE_SIZE + 1];
>>  
>> +bool input_swap;
>> +
>>  static char const short_options[] = "A:aBbcDdeFfHhIij:LlN:OoS:st:vw::Xx";
>>  
>>  /* For long options that have no equivalent short option, use a
>>     non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
>>  enum
>>  {
>> -  TRADITIONAL_OPTION = CHAR_MAX + 1
>> +  TRADITIONAL_OPTION = CHAR_MAX + 1,
>> +  ENDIAN_OPTION,
>>  };
>>  
>>  static struct option const long_options[] =
>> @@ -278,6 +281,7 @@ static struct option const long_options[] =
>>    {"strings", optional_argument, NULL, 'S'},
>>    {"traditional", no_argument, NULL, TRADITIONAL_OPTION},
>>    {"width", optional_argument, NULL, 'w'},
>> +  {"endian", required_argument, NULL, ENDIAN_OPTION },
>>  
>>    {GETOPT_HELP_OPTION_DECL},
>>    {GETOPT_VERSION_OPTION_DECL},
>> @@ -406,7 +410,21 @@ N (size_t fields, size_t blank, void const *block,      
>>                 \
>>      {                                                                   \
>>        int next_pad = pad * (i - 1) / fields;                            \
>>        int adjusted_width = pad_remaining - next_pad + width;            \
>> -      T x = *p++;                                                       \
>> +      T x;                                                              \
>> +      if (input_swap && sizeof(T) > 1)                                  \
>> +        {                                                               \
>> +          int j;                                                        \
>> +          union {                                                       \
>> +            T x;                                                        \
>> +            char b[sizeof(T)];                                          \
>> +          } u;                                                          \
>> +          for (j = 0; j < sizeof(T); j++)                               \
>> +            u.b[j] = ((const char *) p)[sizeof(T) - 1 - j];             \
>> +          x = u.x;                                                      \
>> +        }                                                               \
>> +      else                                                              \
>> +        x = *p;                                                         \
>> +      p++;                                                              \
>>        ACTION;                                                           \
>>        pad_remaining = next_pad;                                         \
>>      }                                                                   \
>> @@ -1664,6 +1682,24 @@ main (int argc, char **argv)
>>            traditional = true;
>>            break;
>>  
>> +        case ENDIAN_OPTION:
>> +          if (!strcmp (optarg, "big"))
>> +            {
>> +#if !WORDS_BIGENDIAN
>> +              input_swap = true;
>> +#endif
>> +            }
>> +          else if (!strcmp (optarg, "little"))
>> +            {
>> +#if WORDS_BIGENDIAN
>> +                input_swap = true;
>> +#endif
>> +            }
>> +          else
>> +            error (EXIT_FAILURE, 0,
>> +                   _("bad argument '%s' for --endian option"), optarg);
>> +          break;
>> +
>>            /* The next several cases map the traditional format
>>               specification options to the corresponding modern format
>>               specs.  GNU od accepts any combination of old- and
> 
> That looks good.
> I'll adjust slightly to use XARGMATCH and add some docs/tests.
> I'm travelling at the moment but merge this soon.

Attached in the patch I intend to push in your name.

I changed the option handling to reuse the XARGMATCH functionality.
Also I changed things slightly so as the last --endian option
specified wins. Previously we only set the input_swap variable
to true, never to false. On a related point I set the input_swap
global to be static.

I also added docs to usage() and the texinfo file, and added a test.

BTW I checked if there was any speed difference with the new code.
I wasn't expecting this to be a bottleneck, and true enough
there is only a marginal change. The new code is consistently
a little _faster_ though on my i3-2310M which is a bit surprising.

 $ truncate -s1G od.in
 $ time od.old -tx8 od.in
 5.05 elapsed
 $ time od.new -tx8 --endian=bug od.in
 4.97 elapsed

My hunch is there is more pretching happening in the new version,
but can't check on this system due to:

  $ perf stat -e L1-dcache-prefetches:u true
      <not supported> L1-dcache-prefetches:u

For kicks I put in bswap_{16,32,64}() calls which are guaranteed
available by gnulib, but replaced with architecture specific asm
on this system, and the speed regressed back to that of od.old.

thanks,
Pádraig.
>From 9069a82ce4b1411a2f56c8bf458a8d8d74e3a7ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <ni...@lysator.liu.se>
Date: Fri, 31 Jan 2014 17:39:52 +0100
Subject: [PATCH] od: add an --endian option to control byte swapping

* src/od.c (main): Handle the new --endian option,
taking "little" and "big" as parameters.
(usage): Describe the new option.
(PRINT_FIELDS): Adjust to swap bytes if required.
* tests/misc/od-endian.sh: A new test to verify
the byte swapping operations for hex (ints) and floats
for all sizes between 1 and 16 inclusive.
* test/local.mk: Reference the new test.
* doc/coreutils.texi (od invocation): Describe the new option.
* NEWS: Mention the new feature.
---
 NEWS                    |    5 ++++
 doc/coreutils.texi      |   10 ++++++++
 src/od.c                |   59 ++++++++++++++++++++++++++++++++++++++++++++--
 tests/local.mk          |    1 +
 tests/misc/od-endian.sh |   38 ++++++++++++++++++++++++++++++
 5 files changed, 110 insertions(+), 3 deletions(-)
 create mode 100755 tests/misc/od-endian.sh

diff --git a/NEWS b/NEWS
index e091d18..0da82ac 100644
--- a/NEWS
+++ b/NEWS
@@ -18,6 +18,11 @@ GNU coreutils NEWS                                    -*- outline -*-
   it would display an error, requiring --no-dereference to avoid the issue.
   [bug introduced in coreutils-5.3.0]
 
+** New features
+
+  od accepts a new option: --endian=TYPE to handle inputs with different byte
+  orders, or to provide consistent output on systems with disparate endianness.
+
 ** Improvements
 
   stat and tail work better with HFS+ and HFSX.  stat -f --format=%T now reports
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 35e7bd9..3269291 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -1868,6 +1868,16 @@ none (do not print offsets).
 
 The default is octal.
 
+@item --endian=@var{order}
+@opindex --endian
+@cindex byte-swapping
+@cindex endianness
+Reorder input bytes, to handle inputs with differing byte orders,
+or to provide consistent output independent of the endian convention
+of the current system.  Swapping is performed according to the
+specified @option{--type} size and endian @var{order}, which can be
+@samp{little} or @samp{big}.
+
 @item -j @var{bytes}
 @itemx --skip-bytes=@var{bytes}
 @opindex -j
diff --git a/src/od.c b/src/od.c
index 514fe50..2784ea7 100644
--- a/src/od.c
+++ b/src/od.c
@@ -23,6 +23,7 @@
 #include <getopt.h>
 #include <sys/types.h>
 #include "system.h"
+#include "argmatch.h"
 #include "error.h"
 #include "ftoastr.h"
 #include "quote.h"
@@ -259,13 +260,37 @@ static enum size_spec integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1];
 #define MAX_FP_TYPE_SIZE sizeof (long double)
 static enum size_spec fp_type_size[MAX_FP_TYPE_SIZE + 1];
 
+#ifndef WORDS_BIGENDIAN
+# define WORDS_BIGENDIAN 0
+#endif
+
+/* Use native endianess by default.  */
+static bool input_swap;
+
 static char const short_options[] = "A:aBbcDdeFfHhIij:LlN:OoS:st:vw::Xx";
 
 /* For long options that have no equivalent short option, use a
    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 enum
 {
-  TRADITIONAL_OPTION = CHAR_MAX + 1
+  TRADITIONAL_OPTION = CHAR_MAX + 1,
+  ENDIAN_OPTION,
+};
+
+enum endian_type
+{
+  endian_little,
+  endian_big
+};
+
+static char const *const endian_args[] =
+{
+  "little", "big", NULL
+};
+
+static enum endian_type const endian_types[] =
+{
+  endian_little, endian_big
 };
 
 static struct option const long_options[] =
@@ -278,6 +303,7 @@ static struct option const long_options[] =
   {"strings", optional_argument, NULL, 'S'},
   {"traditional", no_argument, NULL, TRADITIONAL_OPTION},
   {"width", optional_argument, NULL, 'w'},
+  {"endian", required_argument, NULL, ENDIAN_OPTION },
 
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
@@ -318,6 +344,7 @@ suffixes may be . for octal and b for multiply by 512.\n\
       fputs (_("\
   -A, --address-radix=RADIX   output format for file offsets; RADIX is one\n\
                                 of [doxn], for Decimal, Octal, Hex or None\n\
+      --endian={big|little}   swap input bytes according the specified order\n\
   -j, --skip-bytes=BYTES      skip BYTES input bytes first\n\
 "), stdout);
       fputs (_("\
@@ -400,13 +427,27 @@ N (size_t fields, size_t blank, void const *block,                      \
    char const *FMT_STRING, int width, int pad)                          \
 {                                                                       \
   T const *p = block;                                                   \
-  uintmax_t i;                                                             \
+  uintmax_t i;                                                          \
   int pad_remaining = pad;                                              \
   for (i = fields; blank < i; i--)                                      \
     {                                                                   \
       int next_pad = pad * (i - 1) / fields;                            \
       int adjusted_width = pad_remaining - next_pad + width;            \
-      T x = *p++;                                                       \
+      T x;                                                              \
+      if (input_swap && sizeof (T) > 1)                                 \
+        {                                                               \
+          int j;                                                        \
+          union {                                                       \
+            T x;                                                        \
+            char b[sizeof (T)];                                         \
+          } u;                                                          \
+          for (j = 0; j < sizeof (T); j++)                              \
+            u.b[j] = ((const char *) p)[sizeof (T) - 1 - j];            \
+          x = u.x;                                                      \
+        }                                                               \
+      else                                                              \
+        x = *p;                                                         \
+      p++;                                                              \
       ACTION;                                                           \
       pad_remaining = next_pad;                                         \
     }                                                                   \
@@ -1664,6 +1705,18 @@ main (int argc, char **argv)
           traditional = true;
           break;
 
+        case ENDIAN_OPTION:
+          switch (XARGMATCH ("--endian", optarg, endian_args, endian_types))
+            {
+              case endian_big:
+                  input_swap = ! WORDS_BIGENDIAN;
+                  break;
+              case endian_little:
+                  input_swap = WORDS_BIGENDIAN;
+                  break;
+            }
+          break;
+
           /* The next several cases map the traditional format
              specification options to the corresponding modern format
              specs.  GNU od accepts any combination of old- and
diff --git a/tests/local.mk b/tests/local.mk
index 9d556f6..815dc6f 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -239,6 +239,7 @@ all_tests =					\
   tests/misc/xstrtol.pl				\
   tests/tail-2/pid.sh				\
   tests/misc/od.pl				\
+  tests/misc/od-endian.sh			\
   tests/misc/od-float.sh			\
   tests/misc/mktemp.pl				\
   tests/misc/arch.sh				\
diff --git a/tests/misc/od-endian.sh b/tests/misc/od-endian.sh
new file mode 100755
index 0000000..79dbc84
--- /dev/null
+++ b/tests/misc/od-endian.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+# verify that od --endian works properly
+
+# Copyright (C) 2014 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ od
+
+in='0123456789abcdef'
+
+in_swapped() { printf '%s' "$in" | sed "s/.\{$1\}/&\\n/g" | rev | tr -d '\n'; }
+
+for e in little big; do
+  test $e = little && eo=big || eo=little
+  for s in 1 2 4 8 16; do
+    for t in x; do
+      od -t $t$s --endian=$e /dev/null > /dev/null 2>&1 || continue
+      printf '%s' "$in" | od -An -t $t$s --endian=$e  > out1
+      in_swapped  "$s"  | od -An -t $t$s --endian=$eo > out2
+      compare out1 out2 || fail=1
+    done
+  done
+done
+
+Exit $fail
-- 
1.7.7.6

Reply via email to