Hi Luigi, hi all luatex devs,

here at Debian we got a bug report about reproducability of luatex
format dumps. It contains a patch to make the hyphenation exception list
sorted. (I attach the patch)

Could you please take a look whether this is still relevant for the
latest release of luatex.

Thanks

Norbert

On Fri, 08 Apr 2022, Roland Clobus wrote:
> Hello maintainers of texlive-binaries,
> 
> While working on the “reproducible builds” effort [1], I have noticed that the
> live image for Cinnamon in bookworm is no longer reproducible [2].
> 
> The attached patch ensures that the output of the function 'exception_strings'
> always uses the same order of the hyphenation exceptions.
> I've written the solution in C, perhaps someone more versed in lua could
> rewrite it more elegantly.
> (The lua manual says for the 'next' function: 'The order in which the indices
> are enumerated is not specified' [3])
> 
> With the attached patch applied, I'm able (with the help of 
> FORCE_SOURCE_DATE=1
> and SOURCE_DATE_EPOCH) to reproducibly rebuild the .fmt files, as created by
> 'fmtutil --sys --all'.
> 
> Small test case to reproduce:
> export FORCE_SOURCE_DATE=1
> export SOURCE_DATE_EPOCH=$(date +%s)
> for i in `seq 1 10`; do luahbtex -ini -jobname=luahbtex -progname=luabhtex
> luatex.ini > /dev/null; md5sum luahbtex.*; done
> 
> With kind regards,
> Roland Clobus
> 
>  [1]: https://wiki.debian.org/ReproducibleBuilds
>  [2]:
> https://jenkins.debian.net/view/live/job/reproducible_debian_live_build_cinnamon_bookworm/
>  [3]: http://www.lua.org/manual/5.4/manual.html#pdf-next
> 

--
PREINING Norbert                              https://www.preining.info
Mercari Inc.     +     IFMGA Guide     +     TU Wien     +     TeX Live
GPG: 0x860CDC13   fp: F7D8 A928 26E3 16A1 9FA0 ACF0 6CAC A448 860C DC13
diff --git a/texk/web2c/luatexdir/lang/texlang.c 
b/texk/web2c/luatexdir/lang/texlang.c
index ba7614ff..ccc0ec90 100644
--- a/texk/web2c/luatexdir/lang/texlang.c
+++ b/texk/web2c/luatexdir/lang/texlang.c
@@ -498,10 +498,20 @@ static char *hyphenation_exception(int exceptions, char 
*w)
     return ret;
 }
 
+char *unsorted_buffer = NULL;
+size_t *indexes = NULL;
+
+static int sort_func(const void *a, const void *b) {
+    size_t ia = *(size_t*)a;
+    size_t ib = *(size_t*)b;
+    return strcmp(&unsorted_buffer[ia], &unsorted_buffer[ib]);
+}
+
 char *exception_strings(struct tex_language *lang)
 {
     const char *value;
     size_t size = 0, current = 0;
+    size_t num_bytes = 0;
     size_t l = 0;
     char *ret = NULL;
     if (lang->exceptions == 0)
@@ -509,19 +519,42 @@ char *exception_strings(struct tex_language *lang)
     lua_checkstack(Luas, 2);
     lua_rawgeti(Luas, LUA_REGISTRYINDEX, lang->exceptions);
     if (lua_istable(Luas, -1)) {
-        /*tex Iterate and join. */
+        /*tex Determine required memory. */
         lua_pushnil(Luas);
         while (lua_next(Luas, -2) != 0) {
             value = lua_tolstring(Luas, -1, &l);
-            if (current + 2 + l > size) {
-                ret = xrealloc(ret, (unsigned) ((size + size / 5) + current + 
l + 1024));
-                size = (size + size / 5) + current + l + 1024;
-            }
-            *(ret + current) = ' ';
-            strcpy(ret + current + 1, value);
+            num_bytes += l + 1;
+            size++;
+            lua_pop(Luas, 1);
+        }
+        unsorted_buffer = xmalloc(num_bytes);
+        indexes = xmalloc(sizeof(size_t)*size);
+
+        /*tex Fetch values. */
+        current = 0;
+        size = 0;
+        lua_pushnil(Luas);
+        while (lua_next(Luas, -2) != 0) {
+            value = lua_tolstring(Luas, -1, &l);
+            strcpy(unsorted_buffer + current, value);
+            indexes[size++] = current;
             current += l + 1;
             lua_pop(Luas, 1);
         }
+        /*tex Sort and join. */
+        qsort(indexes, size, sizeof(size_t), sort_func);
+        ret = xmalloc(num_bytes);
+        current = 0;
+        for (l = 0; l < size; l++) {
+           strcpy(ret + current, &unsorted_buffer[indexes[l]]);
+           current += strlen(&unsorted_buffer[indexes[l]]);
+           ret[current] = ' ';
+           current += 1;
+        }
+        ret[current - 1] = '\0';
+
+        free(unsorted_buffer);
+        free(indexes);
     }
     return ret;
 }

Reply via email to