On Mon, May 22, 2023 at 3:13 PM Albert Esteve <aest...@redhat.com> wrote:
> > > > On Sat, May 20, 2023 at 9:36 AM Marc-André Lureau < > marcandre.lur...@gmail.com> wrote: > >> Hi >> >> On Thu, May 18, 2023 at 4:03 PM Albert Esteve <aest...@redhat.com> wrote: >> >>> Add hash and an equal function to uuid module. >>> >>> Add a couple simple unit tests for new functions, >>> checking collisions for similar UUIDs in the case >>> of the hash function, and comparing generated UUIDs >>> for the equal function. >>> >>> Signed-off-by: Albert Esteve <aest...@redhat.com> >>> --- >>> include/qemu/uuid.h | 4 ++++ >>> tests/unit/test-uuid.c | 46 ++++++++++++++++++++++++++++++++++++++++++ >>> util/uuid.c | 38 ++++++++++++++++++++++++++++++++++ >>> 3 files changed, 88 insertions(+) >>> >>> diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h >>> index dc40ee1fc9..136df682c9 100644 >>> --- a/include/qemu/uuid.h >>> +++ b/include/qemu/uuid.h >>> @@ -96,4 +96,8 @@ int qemu_uuid_parse(const char *str, QemuUUID *uuid); >>> >>> QemuUUID qemu_uuid_bswap(QemuUUID uuid); >>> >>> +uint32_t qemu_uuid_hash(const void *uuid); >>> + >>> +int qemu_uuid_equal(const void *lhv, const void *rhv); >>> >> >> There is already qemu_uuid_is_equal() >> >> > > Agh, true. I'll remove it. Not sure why my brain ignored it as I was > reading the code... > One thing to consider here is that the function signature, if we want to pass it as a parameter for g_hash_table_new, expects void pointers whereas qemu_uuid_is_equal() takes QemuUUID pointers. How would you suggest proceeding here? Would be better to "overload" (or wrap) a call to qemu_uuid_equal() from another function that matches the expected signature? (int qemu_uuid_is_equal(void*, void*) is not a good name in that case, it should be something that highlights the difference between the two in the name) Or should I change the current existing function signature? > >> + >>> #endif >>> diff --git a/tests/unit/test-uuid.c b/tests/unit/test-uuid.c >>> index c111de5fc1..8c865869d5 100644 >>> --- a/tests/unit/test-uuid.c >>> +++ b/tests/unit/test-uuid.c >>> @@ -171,6 +171,50 @@ static void test_uuid_unparse_strdup(void) >>> } >>> } >>> >>> +static void test_uuid_hash(void) >>> +{ >>> + QemuUUID uuid; >>> + int i; >>> + >>> + for (i = 0; i < 100; i++) { >>> + qemu_uuid_generate(&uuid); >>> + /* Obtain the UUID hash */ >>> + uint32_t hash_a = qemu_uuid_hash(&uuid); >>> + int data_idx = g_random_int_range(0, 15); >>> + /* Change a single random byte of the UUID */ >>> + if (uuid.data[data_idx] < 0xFF) { >>> + uuid.data[data_idx]++; >>> + } else { >>> + uuid.data[data_idx]--; >>> + } >>> + /* Obtain the UUID hash again */ >>> + uint32_t hash_b = qemu_uuid_hash(&uuid); >>> + /* >>> + * Both hashes shall be different (avoid collision) >>> + * for any change in the UUID fields >>> + */ >>> + g_assert_cmpint(hash_a, !=, hash_b); >>> + } >>> +} >>> + >>> +static void test_uuid_equal(void) >>> +{ >>> + QemuUUID uuid_a, uuid_b, uuid_c; >>> + int i; >>> + >>> + for (i = 0; i < 100; i++) { >>> + qemu_uuid_generate(&uuid_a); >>> + qemu_uuid_generate(&uuid_b); >>> + memcpy(&uuid_c, &uuid_a, sizeof(uuid_a)); >>> + >>> + g_assert(qemu_uuid_equal(&uuid_a, &uuid_a)); >>> + g_assert(qemu_uuid_equal(&uuid_b, &uuid_b)); >>> + g_assert(qemu_uuid_equal(&uuid_a, &uuid_c)); >>> + g_assert_false(qemu_uuid_equal(&uuid_a, &uuid_b)); >>> + g_assert_false(qemu_uuid_equal(NULL, NULL)); >>> + } >>> +} >>> + >>> int main(int argc, char **argv) >>> { >>> g_test_init(&argc, &argv, NULL); >>> @@ -179,6 +223,8 @@ int main(int argc, char **argv) >>> g_test_add_func("/uuid/parse", test_uuid_parse); >>> g_test_add_func("/uuid/unparse", test_uuid_unparse); >>> g_test_add_func("/uuid/unparse_strdup", test_uuid_unparse_strdup); >>> + g_test_add_func("/uuid/hash", test_uuid_hash); >>> + g_test_add_func("/uuid/equal", test_uuid_equal); >>> >>> return g_test_run(); >>> } >>> diff --git a/util/uuid.c b/util/uuid.c >>> index b1108dde78..efa9b0a0e4 100644 >>> --- a/util/uuid.c >>> +++ b/util/uuid.c >>> @@ -16,6 +16,7 @@ >>> #include "qemu/osdep.h" >>> #include "qemu/uuid.h" >>> #include "qemu/bswap.h" >>> +#include "qemu/xxhash.h" >>> >>> void qemu_uuid_generate(QemuUUID *uuid) >>> { >>> @@ -116,3 +117,40 @@ QemuUUID qemu_uuid_bswap(QemuUUID uuid) >>> bswap16s(&uuid.fields.time_high_and_version); >>> return uuid; >>> } >>> + >>> +uint32_t qemu_uuid_hash(const void *uuid) >>> +{ >>> + QemuUUID *id = (QemuUUID *) uuid; >>> + uint64_t ab = (id->fields.time_low) | >>> + (((uint64_t) id->fields.time_mid) << 32) | >>> + (((uint64_t) id->fields.time_high_and_version) << 48); >>> + uint64_t cd = (id->fields.clock_seq_and_reserved) | >>> + (id->fields.clock_seq_low << 8); >>> + int i = 0, shift = 8; >>> + >>> + for (; i < 6; i++) { >>> + shift += 8; >>> + cd |= ((uint64_t) id->fields.node[i]) << shift; >>> + } >>> + >>> + return qemu_xxhash4(ab, cd); >>> + >>> >> >> That looks quite complex, and I have no idea if this is a good hash or >> not. >> >> Instead I would implement the traditional "djb" hash over the char[16] >> data (see g_str_hash implementation for \0-terminated implementation) >> > > ok, I'll try to do something like that. Thanks for the suggestion. > > I looked for any hash library within qemu code and xxhash was one of the > options that seemed easier to use. > > >> >> >>> } >>> + >>> +int qemu_uuid_equal(const void *lhv, const void *rhv) >>> +{ >>> + int i; >>> + QemuUUID *lid = (QemuUUID *) lhv; >>> + QemuUUID *rid = (QemuUUID *) rhv; >>> + if (lid == NULL || rid == NULL) { >>> + return 0; >>> + } >>> + if (lid == rid) { >>> + return 1; >>> + } >>> + for (i = 0; i < 16; i++) { >>> + if (lid->data[i] != rid->data[i]) { >>> + return 0; >>> + } >>> + } >>> + return 1; >>> +} >>> -- >>> 2.40.0 >>> >>> >> >> -- >> Marc-André Lureau >> >