[New module] Persistent Hash Array Mapped Tries (HAMTs)

2020-10-09 Thread Marc Nieper-Wißkirchen
Hi,

after I have contributed two comparably trivial modules to Gnulib, I
would like to contribute a less trivial module this time. It
implements a persistent version of Phil Bagwell's HAMTs, which has
been popularized by Clojure. HAMTs can be used when a persistent
(functional/pure) version of a data structure akin to hash tables is
needed. For example, the dynamic environment of a (possibly
multi-threaded) Lisp or Scheme can be modeled with persistent HAMTs.

Please take a look at the attached patch.

Thank you,

Marc
From 39a1ac1f78c8d00b1dfad4f260318a6fb5cf5584 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc=20Nieper-Wi=C3=9Fkirchen?= 
Date: Wed, 7 Oct 2020 09:53:48 +0200
Subject: [PATCH] hamt: New module.

This module provides (persistent) hash array mapped tries.

* MODULES.html.sh: Add hamt.
* lib/hamt.c, lib/hamt.h, modules/hamt, modules/hamt-tests,
tests/test-hamt.c: New files.
---
 MODULES.html.sh|   1 +
 lib/hamt.c | 812 +
 lib/hamt.h |  97 ++
 modules/hamt   |  29 ++
 modules/hamt-tests |  11 +
 tests/test-hamt.c  | 148 +
 6 files changed, 1098 insertions(+)
 create mode 100644 lib/hamt.c
 create mode 100644 lib/hamt.h
 create mode 100644 modules/hamt
 create mode 100644 modules/hamt-tests
 create mode 100644 tests/test-hamt.c

diff --git a/MODULES.html.sh b/MODULES.html.sh
index a8a629e29..b48ca2bc4 100755
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -2028,6 +2028,7 @@ func_all_modules ()
   func_module hash-pjw
   func_module hash-pjw-bare
   func_module hash
+  func_module hamt
   func_module readline
   func_module readtokens
   func_module readtokens0
diff --git a/lib/hamt.c b/lib/hamt.c
new file mode 100644
index 0..4876d5809
--- /dev/null
+++ b/lib/hamt.c
@@ -0,0 +1,812 @@
+/* (Persistent) hash array mapped tries.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   Written by Marc Nieper-Wißkirchen , 2020.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see .  */
+
+#include 
+#include "hamt.h"
+
+#include 
+#include 
+#include 
+#include 
+#include "count-one-bits.h"
+#include "verify.h"
+#include "xalloc.h"
+
+/* See: Phil Bagwell (2000). Ideal Hash Trees (Report). Infoscience
+   Department, École Polytechnique Fédérale de Lausanne.
+
+   http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf
+
+   We implement a persistent version of hash array mapped tires.  Each
+   updating operation returns a new hamt, which shares structure with
+   the original one.  If persistence is not needed, transient hash
+   tables are probably faster. */
+
+typedef
+#ifdef GL_HAMT_THREAD_SAFE
+_Atomic
+#endif
+size_t ref_counter;
+
+/* Hash values are of type size_t.  For each level of the trie, we use
+   5 bits (corresponding to lg2 of the width of a 32-bit word.  */
+#define MAX_DEPTH ((SIZE_WIDTH + 4) / 5)
+
+/***/
+/* Entry Types */
+/***/
+
+/* Leaf nodes are of type element.  Non-leaf nodes are either
+   subtries or, if at maximal depth, buckets.  */
+enum entry_type
+{
+  element_entry = 0,
+  subtrie_entry = 1,
+  bucket_entry = 2
+};
+
+/* Return the type an entry.  */
+static enum entry_type
+entry_type (const Hamt_entry *entry)
+{
+  return entry->ref_count & 3;
+}
+
+//
+/* Reference Counts */
+//
+
+/* Initialize the reference counter, storing its type.  */
+static void
+init_ref_counter (ref_counter *counter, enum entry_type type)
+{
+  *counter = 4 + type;
+}
+
+/* Increase the reference counter of an entry.  */
+static void
+inc_ref_counter (ref_counter *counter)
+{
+  *counter += 4;
+}
+
+/* Decrease the entry reference counter.  Return false if the entry
+   can be deleted.  */
+static bool
+dec_ref_counter (ref_counter *counter)
+{
+  *counter -= 4;
+  return *counter >= 4;
+}
+
+/**/
+/* Structures */
+/**/
+
+/* Different generations of a hamt share a function table.  */
+struct function_table
+{
+  Hamt_hasher *hasher;
+  Hamt_comparator *comparator;
+  Hamt_freer *freer;
+  ref_counter ref_count;
+};
+
+/* Different generations of a hamt share subtries.  A singleton
+   subtrie is modelled as a single element.  */
+struct subtrie
+{
+  ref_counter ref_count;
+  /* Nodes carry labels from 0 to 31.  The i-th bit in map is set if
+ the node labelled i is present.  */
+  uint32_t map;
+  Hamt_entry 

Re: [PATCH] config.sub: fix i*86-pc-os2-emx is not recognized as a valid OS

2020-10-09 Thread Eric Blake
On 10/9/20 11:06 AM, KO Myung-Hun wrote:
> OS part is os2-emx not emx.
> 
> * build-aux/config.sub [i*86-pc-os2-emx]: Set os to os2-emx.
> ---
>  build-aux/config.sub | 4 
>  1 file changed, 4 insertions(+)

Thanks, but config.sub is only mirrored, not maintained, by gnulib.
You'll first want to get this upstream at config-patc...@gnu.org

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


[PATCH] config.sub: fix i*86-pc-os2-emx is not recognized as a valid OS

2020-10-09 Thread KO Myung-Hun
OS part is os2-emx not emx.

* build-aux/config.sub [i*86-pc-os2-emx]: Set os to os2-emx.
---
 build-aux/config.sub | 4 
 1 file changed, 4 insertions(+)

diff --git a/build-aux/config.sub b/build-aux/config.sub
index 9bc49a7e9..9fb81d598 100755
--- a/build-aux/config.sub
+++ b/build-aux/config.sub
@@ -1292,6 +1292,10 @@ case $basic_os in
kernel=nto
os=`echo $basic_os | sed -e 's|nto-qnx|qnx|'`
;;
+   os2*)
+   kernel=
+   os=$basic_os
+   ;;
*-*)
# shellcheck disable=SC2162
IFS="-" read kernel os <

[PATCH] pthread_sigmask: fix implicit declaration of 'pthread_sigmask'

2020-10-09 Thread KO Myung-Hun
This fixes the following warning:

-
  CC   lib/pthread_sigmask.o
  lib/pthread_sigmask.c: In function 'rpl_pthread_sigmask':
  lib/pthread_sigmask.c:52:9: warning: implicit declaration of function 
'pthread_sigmask'; did you mean 'rpl_pthread_sigmask'? 
[-Wimplicit-function-declaration]
 52 |   ret = pthread_sigmask (how, new_mask, old_mask_ptr);
| ^~~
| rpl_pthread_sigmask
-

* lib/signal.in.h [__KLIBC__]: Include .
* lib/sys_select.in.h [__KLIBC__]: Do not include .
---
 lib/signal.in.h | 6 +++---
 lib/sys_select.in.h | 9 -
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/lib/signal.in.h b/lib/signal.in.h
index c94b053d6..70a2d4af6 100644
--- a/lib/signal.in.h
+++ b/lib/signal.in.h
@@ -55,13 +55,13 @@
 #ifndef _@GUARD_PREFIX@_SIGNAL_H
 #define _@GUARD_PREFIX@_SIGNAL_H
 
-/* Mac OS X 10.3, FreeBSD 6.4, OpenBSD 3.8, OSF/1 4.0, Solaris 2.6, Android
-   declare pthread_sigmask in , not in .
+/* Mac OS X 10.3, FreeBSD 6.4, OpenBSD 3.8, OSF/1 4.0, Solaris 2.6, Android,
+   OS/2 kLIBC declare pthread_sigmask in , not in .
But avoid namespace pollution on glibc systems.*/
 #if (@GNULIB_PTHREAD_SIGMASK@ || defined GNULIB_POSIXCHECK) \
 && ((defined __APPLE__ && defined __MACH__) \
 || defined __FreeBSD__ || defined __OpenBSD__ || defined __osf__ \
-|| defined __sun || defined __ANDROID__) \
+|| defined __sun || defined __ANDROID__ || defined __KLIBC__) \
 && ! defined __GLIBC__
 # include 
 #endif
diff --git a/lib/sys_select.in.h b/lib/sys_select.in.h
index d625d73e7..034b0f32f 100644
--- a/lib/sys_select.in.h
+++ b/lib/sys_select.in.h
@@ -103,9 +103,16 @@
 /* Get definition of 'sigset_t'.
But avoid namespace pollution on glibc systems and "unknown type
name" problems on Cygwin.
+   On OS/2 kLIBC, sigset_t is defined in , too. In addition,
+   if  is included,  ->  -> 
+   are included. Then  ->  are included by GNULIB. By the
+   way,  requires PAGE_SIZE defined in . However,
+has not been processed, yet. As a result, 'PAGE_SIZE'
+   undeclared error occurs in .
Do this after the include_next (for the sake of OpenBSD 5.0) but before
the split double-inclusion guard (for the sake of Solaris).  */
-#if !((defined __GLIBC__ || defined __CYGWIN__) && !defined __UCLIBC__)
+#if !((defined __GLIBC__ || defined __CYGWIN__ || defined __KLIBC__) \
+  && !defined __UCLIBC__)
 # include 
 #endif
 
-- 
2.22.0