commit:     253ca90f3f968a03ea6fff8f0011cf411764b22e
Author:     Sam James <sam <AT> gentoo <DOT> org>
AuthorDate: Tue Aug 16 02:28:55 2022 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Tue Aug 16 02:29:57 2022 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=253ca90f

sys-apps/file: backport unicode handling fixes to 5.42

Temporarily unkeyworded given I had a few issues before I threw
in a few extra patches. Want to give it a test run for a day
or so myself first before keywording.

Bug: https://bugs.gentoo.org/861089
Signed-off-by: Sam James <sam <AT> gentoo.org>

 sys-apps/file/file-5.42-r1.ebuild                 | 162 +++++++++
 sys-apps/file/files/file-5.42-unicode-fixes.patch | 414 ++++++++++++++++++++++
 2 files changed, 576 insertions(+)

diff --git a/sys-apps/file/file-5.42-r1.ebuild 
b/sys-apps/file/file-5.42-r1.ebuild
new file mode 100644
index 000000000000..e74d71b49e84
--- /dev/null
+++ b/sys-apps/file/file-5.42-r1.ebuild
@@ -0,0 +1,162 @@
+# Copyright 1999-2022 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+DISTUTILS_USE_PEP517=setuptools
+DISTUTILS_OPTIONAL=1
+PYTHON_COMPAT=( python3_{8..11} )
+
+inherit distutils-r1 libtool toolchain-funcs multilib-minimal
+
+if [[ ${PV} == 9999 ]] ; then
+       EGIT_REPO_URI="https://github.com/glensc/file.git";
+       inherit autotools git-r3
+else
+       VERIFY_SIG_OPENPGP_KEY_PATH="${BROOT}"/usr/share/openpgp-keys/file.asc
+       inherit verify-sig
+       SRC_URI="ftp://ftp.astron.com/pub/file/${P}.tar.gz";
+       SRC_URI+=" verify-sig? ( ftp://ftp.astron.com/pub/file/${P}.tar.gz.asc 
)"
+
+       #KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~loong ~m68k ~mips 
~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86 ~x64-cygwin ~amd64-linux ~x86-linux 
~ppc-macos ~x64-macos ~sparc-solaris ~sparc64-solaris ~x64-solaris ~x86-solaris"
+
+       BDEPEND="verify-sig? ( sec-keys/openpgp-keys-file )"
+fi
+
+DESCRIPTION="Identify a file's format by scanning binary data for patterns"
+HOMEPAGE="https://www.darwinsys.com/file/";
+
+LICENSE="BSD-2"
+SLOT="0"
+IUSE="bzip2 lzma python seccomp static-libs zlib"
+REQUIRED_USE="python? ( ${PYTHON_REQUIRED_USE} )"
+
+DEPEND="
+       bzip2? ( app-arch/bzip2[${MULTILIB_USEDEP}] )
+       lzma? ( app-arch/xz-utils[${MULTILIB_USEDEP}] )
+       python? (
+               ${PYTHON_DEPS}
+               dev-python/setuptools[${PYTHON_USEDEP}]
+       )
+       zlib? ( >=sys-libs/zlib-1.2.8-r1[${MULTILIB_USEDEP}] )"
+RDEPEND="${DEPEND}
+       python? ( !dev-python/python-magic )
+       seccomp? ( sys-libs/libseccomp[${MULTILIB_USEDEP}] )"
+BDEPEND+="
+       python? (
+               ${PYTHON_DEPS}
+               ${DISTUTILS_DEPS}
+       )"
+
+PATCHES=(
+       "${FILESDIR}/file-5.39-portage-sandbox.patch" #713710 #728978
+       "${FILESDIR}/file-5.40-seccomp-fstatat64-musl.patch" #789336, not 
upstream yet
+       "${FILESDIR}/${P}-unicode-fixes.patch" #861089
+)
+
+src_prepare() {
+       default
+
+       if [[ ${PV} == 9999 ]] ; then
+               eautoreconf
+       else
+               elibtoolize
+       fi
+
+       # don't let python README kill main README, bug ##60043
+       mv python/README.md python/README.python.md || die
+       # bug #662090
+       sed 's...@readme.md@README.python.md@' -i python/setup.py || die
+}
+
+multilib_src_configure() {
+       local myeconfargs=(
+               --enable-fsect-man5
+               $(use_enable bzip2 bzlib)
+               $(use_enable lzma xzlib)
+               $(use_enable seccomp libseccomp)
+               $(use_enable static-libs static)
+               $(use_enable zlib)
+       )
+       econf "${myeconfargs[@]}"
+}
+
+build_src_configure() {
+       local myeconfargs=(
+               --disable-shared
+               --disable-libseccomp
+               --disable-bzlib
+               --disable-xzlib
+               --disable-zlib
+       )
+
+       econf_build "${myeconfargs[@]}"
+}
+
+need_build_file() {
+       # when cross-compiling, we need to build up our own file
+       # because people often don't keep matching host/target
+       # file versions, bug #362941
+       tc-is-cross-compiler && ! has_version -b "~${CATEGORY}/${P}"
+}
+
+src_configure() {
+       local ECONF_SOURCE="${S}"
+
+       if need_build_file ; then
+               mkdir -p "${WORKDIR}"/build || die
+               cd "${WORKDIR}"/build || die
+               build_src_configure
+       fi
+
+       multilib-minimal_src_configure
+}
+
+multilib_src_compile() {
+       if multilib_is_native_abi ; then
+               emake
+       else
+               # bug #586444
+               emake -C src magic.h
+               emake -C src libmagic.la
+       fi
+}
+
+src_compile() {
+       if need_build_file ; then
+               # bug #586444
+               emake -C "${WORKDIR}"/build/src magic.h
+               emake -C "${WORKDIR}"/build/src file
+               local -x PATH="${WORKDIR}/build/src:${PATH}"
+       fi
+
+       multilib-minimal_src_compile
+
+       if use python ; then
+               cd python || die
+               distutils-r1_src_compile
+       fi
+}
+
+multilib_src_install() {
+       if multilib_is_native_abi ; then
+               default
+       else
+               emake -C src install-{nodist_includeHEADERS,libLTLIBRARIES} 
DESTDIR="${D}"
+       fi
+}
+
+multilib_src_install_all() {
+       dodoc ChangeLog MAINT # README
+
+       # Required for `file -C`
+       insinto /usr/share/misc/magic
+       doins -r magic/Magdir/*
+
+       if use python ; then
+               cd python || die
+               distutils-r1_src_install
+       fi
+
+       find "${ED}" -type f -name "*.la" -delete || die
+}

diff --git a/sys-apps/file/files/file-5.42-unicode-fixes.patch 
b/sys-apps/file/files/file-5.42-unicode-fixes.patch
new file mode 100644
index 000000000000..91c46a358120
--- /dev/null
+++ b/sys-apps/file/files/file-5.42-unicode-fixes.patch
@@ -0,0 +1,414 @@
+https://bugs.gentoo.org/861089
+https://github.com/file/file/commit/19bf47777d0002ee884467e45e6ace702e40a4c1
+https://github.com/file/file/commit/c80065fe6900be5e794941e29b32440e9969b1c3
+https://github.com/file/file/commit/7e59d34206d7c962e093d4239e5367a2cd8b7623
+https://github.com/file/file/commit/f042050f59bfc037677871c4d1037c33273f5213
+https://github.com/file/file/commit/d471022b2772071877895759f209f2c346757a4c
+https://github.com/file/file/commit/441ac2b15508909e82ad467960df4ac0adf9644c
+
+From 19bf47777d0002ee884467e45e6ace702e40a4c1 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <chris...@zoulas.com>
+Date: Mon, 4 Jul 2022 17:00:51 +0000
+Subject: [PATCH] PR/358: Fix width for -f - (jpalus)
+
+---
+ src/file.c | 46 +++++++++++++++++++++++++++++-----------------
+ 2 files changed, 31 insertions(+), 18 deletions(-)
+
+diff --git a/src/file.c b/src/file.c
+index 5300e5af8..bb058ce1e 100644
+--- a/src/file.c
++++ b/src/file.c
+@@ -506,35 +506,47 @@ unwrap(struct magic_set *ms, const char *fn)
+       size_t llen = 0;
+       int wid = 0, cwid;
+       int e = 0;
++      size_t fi = 0, fimax = 100;
++      char **flist = malloc(sizeof(*flist) * fimax);
+ 
+-      if (strcmp("-", fn) == 0) {
++      if (flist == NULL)
++out:          file_err(EXIT_FAILURE, "Cannot allocate memory for file list");
++
++      if (strcmp("-", fn) == 0)
+               f = stdin;
+-              wid = 1;
+-      } else {
++      else {
+               if ((f = fopen(fn, "r")) == NULL) {
+                       file_warn("Cannot open `%s'", fn);
+                       return 1;
+               }
+-
+-              while ((len = getline(&line, &llen, f)) > 0) {
+-                      if (line[len - 1] == '\n')
+-                              line[len - 1] = '\0';
+-                      cwid = file_mbswidth(ms, line);
+-                      if (cwid > wid)
+-                              wid = cwid;
+-              }
+-
+-              rewind(f);
+       }
+ 
+       while ((len = getline(&line, &llen, f)) > 0) {
+               if (line[len - 1] == '\n')
+                       line[len - 1] = '\0';
+-              e |= process(ms, line, wid);
++              if (fi >= fimax) {
++                      fimax += 100;
++                      char **nf = realloc(flist, fimax * sizeof(*flist));
++                      if (nf == NULL)
++                              goto out;
++              }
++              flist[fi++] = line;
++              cwid = file_mbswidth(ms, line);
++              if (cwid > wid)
++                      wid = cwid;
++              line = NULL;
++              llen = 0;
++      }
++
++      fimax = fi;
++      for (fi = 0; fi < fimax; fi++) {
++              e |= process(ms, flist[fi], wid);
++              free(flist[fi]);
+       }
++      free(flist);
+ 
+-      free(line);
+-      (void)fclose(f);
++      if (f != stdin)
++              (void)fclose(f);
+       return e;
+ }
+ 
+
+From c80065fe6900be5e794941e29b32440e9969b1c3 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <chris...@zoulas.com>
+Date: Mon, 4 Jul 2022 19:44:35 +0000
+Subject: [PATCH] PR/362: ro-ee: fix wide char printing
+
+--- a/src/file.c
++++ b/src/file.c
+@@ -60,6 +60,12 @@ FILE_RCSID("@(#)$File: file.c,v 1.196 2022/07/04 17:00:51 
christos Exp $")
+ #ifdef HAVE_WCTYPE_H
+ #include <wctype.h>
+ #endif
++#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 
&& \
++   defined(HAVE_WCTYPE_H)
++#define FILE_WIDE_SUPPORT
++#else
++#include <ctype.h>
++#endif
+ 
+ #if defined(HAVE_GETOPT_H) && defined(HAVE_STRUCT_OPTION)
+ # include <getopt.h>
+@@ -550,6 +556,55 @@ out:              file_err(EXIT_FAILURE, "Cannot allocate 
memory for file list");
+       return e;
+ }
+ 
++private void
++file_octal(unsigned char c)
++{
++      putc('\\', stdout);
++      putc(((c >> 6) & 7) + '0', stdout);
++      putc(((c >> 3) & 7) + '0', stdout);
++      putc(((c >> 0) & 7) + '0', stdout);
++}
++
++private void
++fname_print(const char *inname)
++{
++      size_t n = strlen(inname);
++#ifdef FILE_WIDE_SUPPORT
++      mbstate_t state;
++      wchar_t nextchar;
++      size_t bytesconsumed;
++
++
++      (void)mbrlen(NULL, 0, &state);
++      while (n > 0) {
++              bytesconsumed = mbrtowc(&nextchar, inname, n, &state);
++              if (bytesconsumed == CAST(size_t, -1) ||
++                  bytesconsumed == CAST(size_t, -2))  {
++                      nextchar = *inname;
++                      bytesconsumed = 1;
++              }
++              inname += bytesconsumed;
++              n -= bytesconsumed;
++              if (iswprint(nextchar)) {
++                      putwc(nextchar, stdout);
++                      continue;
++              }
++              /* XXX: What if it is > 255? */
++              file_octal(CAST(unsigned char, nextchar));
++      }
++#else
++      size_t i;
++      for (i = 0; i < n; i++) {
++              unsigned char c = CAST(unsigned char, inname[i]);
++              if (isprint(c)) {
++                      putc(c);
++                      continue;
++              }
++              file_octal(c);
++      }
++#endif
++}
++
+ /*
+  * Called for each input file on the command line (or in a list of files)
+  */
+@@ -559,15 +614,13 @@ process(struct magic_set *ms, const char *inname, int 
wid)
+       const char *type, c = nulsep > 1 ? '\0' : '\n';
+       int std_in = strcmp(inname, "-") == 0;
+       int haderror = 0;
+-      size_t plen = 4 * wid + 1;
+-      char *pbuf, *pname;
+-
+-      if ((pbuf = CAST(char *, malloc(plen))) == NULL)
+-          file_err(EXIT_FAILURE, "Can't allocate %zu bytes", plen);
+ 
+       if (wid > 0 && !bflag) {
+-              pname = file_printable(ms, pbuf, plen, inname, wid);
+-              (void)printf("%s", std_in ? "/dev/stdin" : pname);
++              const char *pname = std_in ? "/dev/stdin" : inname;
++              if ((ms->flags & MAGIC_RAW) == 0)
++                      fname_print(pname);
++              else
++                      (void)printf("%s", pname);
+               if (nulsep)
+                       (void)putc('\0', stdout);
+               if (nulsep < 2) {
+@@ -586,7 +639,6 @@ process(struct magic_set *ms, const char *inname, int wid)
+       }
+       if (nobuffer)
+               haderror |= fflush(stdout) != 0;
+-      free(pbuf);
+       return haderror || type == NULL;
+ }
+ 
+@@ -594,35 +646,33 @@ protected size_t
+ file_mbswidth(struct magic_set *ms, const char *s)
+ {
+       size_t width = 0;
+-#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 
&& \
+-   defined(HAVE_WCTYPE_H)
+-      size_t bytesconsumed, old_n, n;
++#ifdef FILE_WIDE_SUPPORT
++      size_t bytesconsumed, n;
+       mbstate_t state;
+       wchar_t nextchar;
+-      (void)memset(&state, 0, sizeof(mbstate_t));
+-      old_n = n = strlen(s);
++
++      (void)mbrlen(NULL, 0, &state);
++      n = strlen(s);
+ 
+       while (n > 0) {
+               bytesconsumed = mbrtowc(&nextchar, s, n, &state);
+               if (bytesconsumed == CAST(size_t, -1) ||
+                   bytesconsumed == CAST(size_t, -2)) {
+-                      /* Something went wrong, return something reasonable */
+-                      return old_n;
++                      nextchar = *s;
++                      bytesconsumed = 1;
+               }
+               width += ((ms->flags & MAGIC_RAW) != 0
+                   || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
+ 
+               s += bytesconsumed, n -= bytesconsumed;
+       }
+-      return width;
+ #else
+       while (*s) {
+               width += (ms->flags & MAGIC_RAW) != 0
+                   || isprint(CAST(unsigned char, *s)) ? 1 : 4;
+       }
+-
+-      return strlen(s);
+ #endif
++      return width;
+ }
+ 
+ private void
+--- a/src/file.h
++++ b/src/file.h
+@@ -575,7 +575,7 @@ protected size_t file_pstring_length_size(struct magic_set 
*,
+     const struct magic *);
+ protected size_t file_pstring_get_length(struct magic_set *,
+     const struct magic *, const char *);
+-public char * file_printable(struct magic_set *, char *, size_t,
++protected char * file_printable(struct magic_set *, char *, size_t,
+     const char *, size_t);
+ #ifdef __EMX__
+ protected int file_os2_apptype(struct magic_set *, const char *, const void *,
+--- a/src/funcs.c
++++ b/src/funcs.c
+@@ -763,7 +763,7 @@ file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb)
+ /*
+  * convert string to ascii printable format.
+  */
+-public char *
++protected char *
+ file_printable(struct magic_set *ms, char *buf, size_t bufsiz,
+     const char *str, size_t slen)
+ {
+
+From 7e59d34206d7c962e093d4239e5367a2cd8b7623 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <chris...@zoulas.com>
+Date: Mon, 4 Jul 2022 20:16:29 +0000
+Subject: [PATCH] Handle invalid characters as octal (idea from PR/363 by
+ dimich)
+
+---
+ src/file.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/src/file.c b/src/file.c
+index 5e89137d7..af9be0f0c 100644
+--- a/src/file.c
++++ b/src/file.c
+@@ -580,8 +580,11 @@ fname_print(const char *inname)
+               bytesconsumed = mbrtowc(&nextchar, inname, n, &state);
+               if (bytesconsumed == CAST(size_t, -1) ||
+                   bytesconsumed == CAST(size_t, -2))  {
+-                      nextchar = *inname;
+-                      bytesconsumed = 1;
++                      nextchar = *inname++;
++                      n--;
++                      (void)mbrlen(NULL, 0, &state);
++                      file_octal(CAST(unsigned char, nextchar));
++                      continue;
+               }
+               inname += bytesconsumed;
+               n -= bytesconsumed;
+@@ -660,9 +663,12 @@ file_mbswidth(struct magic_set *ms, const char *s)
+                   bytesconsumed == CAST(size_t, -2)) {
+                       nextchar = *s;
+                       bytesconsumed = 1;
++                      (void)mbrlen(NULL, 0, &state);
++                      width += 4;
++              } else {
++                      width += ((ms->flags & MAGIC_RAW) != 0
++                          || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
+               }
+-              width += ((ms->flags & MAGIC_RAW) != 0
+-                  || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
+ 
+               s += bytesconsumed, n -= bytesconsumed;
+       }
+
+
+From f042050f59bfc037677871c4d1037c33273f5213 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <chris...@zoulas.com>
+Date: Mon, 4 Jul 2022 22:30:51 +0000
+Subject: [PATCH] mbrlen(NULL, is not portable; revert to using memset to
+ initialize the state.
+
+---
+ src/file.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/src/file.c b/src/file.c
+index af9be0f0c..8b4f14c2e 100644
+--- a/src/file.c
++++ b/src/file.c
+@@ -575,14 +575,14 @@ fname_print(const char *inname)
+       size_t bytesconsumed;
+ 
+ 
+-      (void)mbrlen(NULL, 0, &state);
++      (void)memset(&state, 0, sizeof(state));
+       while (n > 0) {
+               bytesconsumed = mbrtowc(&nextchar, inname, n, &state);
+               if (bytesconsumed == CAST(size_t, -1) ||
+                   bytesconsumed == CAST(size_t, -2))  {
+                       nextchar = *inname++;
+                       n--;
+-                      (void)mbrlen(NULL, 0, &state);
++                      (void)memset(&state, 0, sizeof(state));
+                       file_octal(CAST(unsigned char, nextchar));
+                       continue;
+               }
+@@ -654,7 +654,7 @@ file_mbswidth(struct magic_set *ms, const char *s)
+       mbstate_t state;
+       wchar_t nextchar;
+ 
+-      (void)mbrlen(NULL, 0, &state);
++      (void)memset(&state, 0, sizeof(state));
+       n = strlen(s);
+ 
+       while (n > 0) {
+@@ -663,7 +663,7 @@ file_mbswidth(struct magic_set *ms, const char *s)
+                   bytesconsumed == CAST(size_t, -2)) {
+                       nextchar = *s;
+                       bytesconsumed = 1;
+-                      (void)mbrlen(NULL, 0, &state);
++                      (void)memset(&state, 0, sizeof(state));
+                       width += 4;
+               } else {
+                       width += ((ms->flags & MAGIC_RAW) != 0
+
+
+From d471022b2772071877895759f209f2c346757a4c Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <chris...@zoulas.com>
+Date: Tue, 5 Jul 2022 19:53:42 +0000
+Subject: [PATCH] Use printf("%lc") instead of putwc(). Somehow mixing wide and
+ narrow stdio does not work on Linux?
+
+--- a/src/file.c
++++ b/src/file.c
+@@ -589,7 +589,7 @@ fname_print(const char *inname)
+               inname += bytesconsumed;
+               n -= bytesconsumed;
+               if (iswprint(nextchar)) {
+-                      putwc(nextchar, stdout);
++                      printf("%lc", nextchar);
+                       continue;
+               }
+               /* XXX: What if it is > 255? */
+
+From 441ac2b15508909e82ad467960df4ac0adf9644c Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <chris...@zoulas.com>
+Date: Tue, 5 Jul 2022 20:05:23 +0000
+Subject: [PATCH] wcwidth is not supposed to return -1 if the character is
+ printable, but it does for 0xff... Prevent it from decreasing the width.
+
+--- a/src/file.c
++++ b/src/file.c
+@@ -666,8 +666,9 @@ file_mbswidth(struct magic_set *ms, const char *s)
+                       (void)memset(&state, 0, sizeof(state));
+                       width += 4;
+               } else {
++                      int w = wcwidth(nextchar);
+                       width += ((ms->flags & MAGIC_RAW) != 0
+-                          || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
++                          || iswprint(nextchar)) ? (w > 0 ? w : 1) : 4;
+               }
+ 
+               s += bytesconsumed, n -= bytesconsumed;
+
+From be1ac8c0aa6d21921012f62582f51a9e546e4972 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <chris...@zoulas.com>
+Date: Tue, 26 Jul 2022 15:10:05 +0000
+Subject: [PATCH] Fix bug with large flist (Florian Weimer)
+
+---
+ src/file.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/src/file.c b/src/file.c
+index e169c08fc..c0b8aa197 100644
+--- a/src/file.c
++++ b/src/file.c
+@@ -535,6 +535,7 @@ out:               file_err(EXIT_FAILURE, "Cannot allocate 
memory for file list");
+                       char **nf = realloc(flist, fimax * sizeof(*flist));
+                       if (nf == NULL)
+                               goto out;
++                      flist = nf;
+               }
+               flist[fi++] = line;
+               cwid = file_mbswidth(ms, line);
+

Reply via email to