On Tue, Nov 7, 2017 at 1:36 PM, Jim Meyering <j...@meyering.net> wrote:
> On Tue, Oct 31, 2017 at 12:03 PM, Curt McDowell <c...@purestorage.com> wrote:
>> Hi,
>>
>> I'm running idutils 4.6 on Ubuntu 14.04 to index a large source base that is
>> on an NFS-mounted filesystem that uses 64-bit inodes. mkid incorrectly
>> issues many warnings such as the following:
>>
>> /home/csm/src/idutils-4.6/src/mkid: warning:
>> `/df-csm/ir-csm7/platform/dot/ir/component/si5338/si5338.py' and
>> `/df-csm/ir-csm7/hardware/perf/CTRL_systemC/src/demux.hpp' are the same
>> file, but yield different scans!
>>
>> It turns out these inodes are equal in the lower 32 bits, but are not equal.
>>
>> % ls -li /df-csm/ir-csm7/platform/dot/ir/component/si5338/si5338.py
>> /df-csm/ir-csm7/hardware/perf/CTRL_systemC/src/demux.hpp
>> 33776997256654722 -rwxr-xr-x 1 csm staff  3290 May 22 22:03
>> /df-csm/ir-csm7/hardware/perf/CTRL_systemC/src/demux.hpp
>> 63050394834562946 -rw-r--r-- 1 csm staff 28973 May 22 22:03
>> /df-csm/ir-csm7/platform/dot/ir/component/si5338/si5338.py
>>
>> The inode numbers in hex are 780000030FEF82 and E00000030FEF82,
>> respectively.
>>
>> I believe the bug is that the inode hash functions should account for the
>> size of the di_ino field in case it is 8 bytes wide, rather than assuming
>> they are always 4 bytes:
>>
>> /****************************************************************************/
>> /* Hash stuff for `struct dev_ino'.  */
>>
>> static unsigned long
>> dev_ino_hash_1 (void const *key)
>> {
>>   unsigned long result = 0;
>>   INTEGER_HASH_1 (((struct dev_ino const *) key)->di_dev, result);
>>   INTEGER_HASH_1 (((struct dev_ino const *) key)->di_ino, result);
>>   return result;
>> }
>>
>> static unsigned long
>> dev_ino_hash_2 (void const *key)
>> {
>>   unsigned long result = 0;
>>   INTEGER_HASH_2 (((struct dev_ino const *) key)->di_dev, result);
>>   INTEGER_HASH_2 (((struct dev_ino const *) key)->di_ino, result);
>>   return result;
>> }
>>
>> As an unrelated issue, in order to get idutils 4.6 to compile on Ubuntu
>> 14.0.4 I had to edit lib/stdio.h and change "#if 1" to "#if 0" for the
>> section that deals with the "gets" function (libc 2.19-0ubuntu6.13).
>
> Thank you for the report.
> That is definitely a bug -- and it is over two decades old!
> I will fix this shortly, and will soon make a test release.

It wasn't as prompt as I would have liked, but here's a patch.
I'll make a snapshot today and post separately to the bug-idutils list.
From 6cdfde50d3dd81e241cd6270057ac0757464f0e0 Mon Sep 17 00:00:00 2001
From: Jim Meyering <meyer...@fb.com>
Date: Tue, 7 Nov 2017 20:09:04 -0800
Subject: [PATCH] fix handling of 64-bit ino_t

The dev/inode hashing code used only four bytes of each inode number,
and thus could malfunction:
  mkid: warning: `f1' and `f1' are the same file, but yield different scans!
* libidu/walker.c (dev_ino_hash_1, dev_ino_hash_2): Adjust to use all bytes
of device and inode numbers.
* NEWS (Bugs): Mention it.
Reported by Curt McDowell in https://bugs.gnu.org/29092
---
 NEWS            |  4 ++++
 libidu/walker.c | 45 +++++++++++++++++++++++++++++----------------
 2 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/NEWS b/NEWS
index 81d19b1..5a8e979 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,10 @@ GNU idutils NEWS                                      -*- 
outline -*-

   lid is no longer susceptible to a buffer overrun

+  mkid no longer mistakenly warns that different files are the same but
+  with "different scans" for files with colliding 64-bit inode numbers
+  [the bug dates back to the initial implementation]
+

 * Noteworthy changes in release 4.6 (2012-02-03) [stable]

diff --git a/libidu/walker.c b/libidu/walker.c
index e332305..82a2392 100644
--- a/libidu/walker.c
+++ b/libidu/walker.c
@@ -1107,23 +1107,36 @@ links_depth (struct file_link const *flink)
 /****************************************************************************/
 /* Hash stuff for `struct dev_ino'.  */

-static unsigned long
-dev_ino_hash_1 (void const *key)
-{
-  unsigned long result = 0;
-  INTEGER_HASH_1 (((struct dev_ino const *) key)->di_dev, result);
-  INTEGER_HASH_1 (((struct dev_ino const *) key)->di_ino, result);
-  return result;
-}
-
-static unsigned long
-dev_ino_hash_2 (void const *key)
-{
-  unsigned long result = 0;
-  INTEGER_HASH_2 (((struct dev_ino const *) key)->di_dev, result);
-  INTEGER_HASH_2 (((struct dev_ino const *) key)->di_ino, result);
-  return result;
+/* FIXME: consider dumping all of this in favor of gnulib's di-set module.  */
+/* Expand to the definition of a function that hashes a dev+inode pair,
+   with application of XFORM.  */
+#define DEV_INO_HASH_DEFUN(FN_NAME, XFORM)                             \
+static size_t _GL_ATTRIBUTE_PURE                                       \
+FN_NAME (void const *x)                                                        
\
+{                                                                      \
+  struct dev_ino const *p = x;                                         \
+  ino_t ino = p->di_ino;                                               \
+                                                                       \
+  /* When INO is wider than size_t, XOR the words of INO into H.       \
+     This avoids loss of info, without applying % to the wider type,   \
+     which could be quite slow on some systems.  */                    \
+  size_t h = XFORM (ino);                                              \
+  unsigned int n_words = sizeof ino / sizeof h + (sizeof ino % sizeof h != 0); 
\
+  for (unsigned int i = 1; i < n_words; i++)                           \
+    h ^= XFORM (ino >> CHAR_BIT * sizeof h * i);                       \
+                                                                       \
+  dev_t dev = p->di_dev;                                               \
+  h ^= XFORM (dev);                                                    \
+  n_words = sizeof dev / sizeof h + (sizeof dev % sizeof h != 0);      \
+  for (unsigned int i = 1; i < n_words; i++)                           \
+    h ^= XFORM (dev >> CHAR_BIT * sizeof h * i);                       \
+                                                                       \
+  return h;                                                            \
 }
+static inline size_t xform_NOP (size_t k) { return k; }
+static inline size_t xform_NOT (size_t k) { return ~k; }
+DEV_INO_HASH_DEFUN(dev_ino_hash_1, xform_NOP)
+DEV_INO_HASH_DEFUN(dev_ino_hash_2, xform_NOT)

 static int
 dev_ino_hash_compare (void const *x, void const *y)
-- 
2.15.1.354.g95ec6b1b3

_______________________________________________
bug-idutils mailing list
bug-idutils@gnu.org
https://lists.gnu.org/mailman/listinfo/bug-idutils

Reply via email to