Hi Luigi, hi all luatex devs, here at Debian we got a bug report about reproducability of luatex format dumps. It contains a patch to make the hyphenation exception list sorted. (I attach the patch)
Could you please take a look whether this is still relevant for the latest release of luatex. Thanks Norbert On Fri, 08 Apr 2022, Roland Clobus wrote: > Hello maintainers of texlive-binaries, > > While working on the “reproducible builds” effort [1], I have noticed that the > live image for Cinnamon in bookworm is no longer reproducible [2]. > > The attached patch ensures that the output of the function 'exception_strings' > always uses the same order of the hyphenation exceptions. > I've written the solution in C, perhaps someone more versed in lua could > rewrite it more elegantly. > (The lua manual says for the 'next' function: 'The order in which the indices > are enumerated is not specified' [3]) > > With the attached patch applied, I'm able (with the help of > FORCE_SOURCE_DATE=1 > and SOURCE_DATE_EPOCH) to reproducibly rebuild the .fmt files, as created by > 'fmtutil --sys --all'. > > Small test case to reproduce: > export FORCE_SOURCE_DATE=1 > export SOURCE_DATE_EPOCH=$(date +%s) > for i in `seq 1 10`; do luahbtex -ini -jobname=luahbtex -progname=luabhtex > luatex.ini > /dev/null; md5sum luahbtex.*; done > > With kind regards, > Roland Clobus > > [1]: https://wiki.debian.org/ReproducibleBuilds > [2]: > https://jenkins.debian.net/view/live/job/reproducible_debian_live_build_cinnamon_bookworm/ > [3]: http://www.lua.org/manual/5.4/manual.html#pdf-next > -- PREINING Norbert https://www.preining.info Mercari Inc. + IFMGA Guide + TU Wien + TeX Live GPG: 0x860CDC13 fp: F7D8 A928 26E3 16A1 9FA0 ACF0 6CAC A448 860C DC13
diff --git a/texk/web2c/luatexdir/lang/texlang.c b/texk/web2c/luatexdir/lang/texlang.c index ba7614ff..ccc0ec90 100644 --- a/texk/web2c/luatexdir/lang/texlang.c +++ b/texk/web2c/luatexdir/lang/texlang.c @@ -498,10 +498,20 @@ static char *hyphenation_exception(int exceptions, char *w) return ret; } +char *unsorted_buffer = NULL; +size_t *indexes = NULL; + +static int sort_func(const void *a, const void *b) { + size_t ia = *(size_t*)a; + size_t ib = *(size_t*)b; + return strcmp(&unsorted_buffer[ia], &unsorted_buffer[ib]); +} + char *exception_strings(struct tex_language *lang) { const char *value; size_t size = 0, current = 0; + size_t num_bytes = 0; size_t l = 0; char *ret = NULL; if (lang->exceptions == 0) @@ -509,19 +519,42 @@ char *exception_strings(struct tex_language *lang) lua_checkstack(Luas, 2); lua_rawgeti(Luas, LUA_REGISTRYINDEX, lang->exceptions); if (lua_istable(Luas, -1)) { - /*tex Iterate and join. */ + /*tex Determine required memory. */ lua_pushnil(Luas); while (lua_next(Luas, -2) != 0) { value = lua_tolstring(Luas, -1, &l); - if (current + 2 + l > size) { - ret = xrealloc(ret, (unsigned) ((size + size / 5) + current + l + 1024)); - size = (size + size / 5) + current + l + 1024; - } - *(ret + current) = ' '; - strcpy(ret + current + 1, value); + num_bytes += l + 1; + size++; + lua_pop(Luas, 1); + } + unsorted_buffer = xmalloc(num_bytes); + indexes = xmalloc(sizeof(size_t)*size); + + /*tex Fetch values. */ + current = 0; + size = 0; + lua_pushnil(Luas); + while (lua_next(Luas, -2) != 0) { + value = lua_tolstring(Luas, -1, &l); + strcpy(unsorted_buffer + current, value); + indexes[size++] = current; current += l + 1; lua_pop(Luas, 1); } + /*tex Sort and join. */ + qsort(indexes, size, sizeof(size_t), sort_func); + ret = xmalloc(num_bytes); + current = 0; + for (l = 0; l < size; l++) { + strcpy(ret + current, &unsorted_buffer[indexes[l]]); + current += strlen(&unsorted_buffer[indexes[l]]); + ret[current] = ' '; + current += 1; + } + ret[current - 1] = '\0'; + + free(unsorted_buffer); + free(indexes); } return ret; }