Hello community, here is the log from the commit of package libxls for openSUSE:Factory checked in at 2020-09-16 19:42:53 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/libxls (Old) and /work/SRC/openSUSE:Factory/.libxls.new.4249 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "libxls" Wed Sep 16 19:42:53 2020 rev:3 rq:834941 version:1.6.1 Changes: -------- --- /work/SRC/openSUSE:Factory/libxls/libxls.changes 2020-08-28 23:45:17.379489815 +0200 +++ /work/SRC/openSUSE:Factory/.libxls.new.4249/libxls.changes 2020-09-16 19:44:06.871104485 +0200 @@ -1,0 +2,8 @@ +Wed Sep 9 14:46:43 UTC 2020 - Jan Engelhardt <[email protected]> + +- Update to release 1.6.1 + * Enabled decoding of non-Unicode character sets in older + (BIFF5) XLS files. + * Improved string conversion performance in newer files. + +------------------------------------------------------------------- Old: ---- libxls-1.5.3.tar.gz New: ---- libxls-1.6.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ libxls.spec ++++++ --- /var/tmp/diff_new_pack.5vbgBm/_old 2020-09-16 19:44:10.267107838 +0200 +++ /var/tmp/diff_new_pack.5vbgBm/_new 2020-09-16 19:44:10.271107841 +0200 @@ -17,13 +17,13 @@ Name: libxls -%define lname libxlsreader1 -Version: 1.5.3 +%define lname libxlsreader8 +Version: 1.6.1 Release: 0 Summary: Library for Parsing Excel (XLS) Files License: BSD-2-Clause Group: Development/Libraries/C and C++ -URL: http://libxls.sourceforge.net/ +URL: https://github.com/libxls/libxls Source: https://github.com/libxls/libxls/releases/download/v%version/libxls-%version.tar.gz BuildRequires: pkg-config ++++++ libxls-1.5.3.tar.gz -> libxls-1.6.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libxls-1.5.3/Makefile.am new/libxls-1.6.1/Makefile.am --- old/libxls-1.5.3/Makefile.am 2020-06-12 20:18:12.000000000 +0200 +++ new/libxls-1.6.1/Makefile.am 2020-09-09 15:59:17.000000000 +0200 @@ -10,6 +10,7 @@ include/libxls/brdb.c.h \ include/libxls/brdb.h \ include/libxls/endian.h \ + include/libxls/locale.h \ include/libxls/ole.h \ include/libxls/xlsstruct.h \ include/libxls/xlstypes.h \ @@ -39,6 +40,7 @@ libxlsreader_la_SOURCES = \ src/xlstool.c \ src/endian.c \ + src/locale.c \ src/ole.c \ src/xls.c diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libxls-1.5.3/Makefile.in new/libxls-1.6.1/Makefile.in --- old/libxls-1.5.3/Makefile.in 2020-06-20 13:42:24.000000000 +0200 +++ new/libxls-1.6.1/Makefile.in 2020-09-09 16:08:11.000000000 +0200 @@ -150,8 +150,8 @@ libxlsreader_la_LIBADD = am__dirstamp = $(am__leading_dot)dirstamp am_libxlsreader_la_OBJECTS = src/libxlsreader_la-xlstool.lo \ - src/libxlsreader_la-endian.lo src/libxlsreader_la-ole.lo \ - src/libxlsreader_la-xls.lo + src/libxlsreader_la-endian.lo src/libxlsreader_la-locale.lo \ + src/libxlsreader_la-ole.lo src/libxlsreader_la-xls.lo libxlsreader_la_OBJECTS = $(am_libxlsreader_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -609,6 +609,7 @@ include/libxls/brdb.c.h \ include/libxls/brdb.h \ include/libxls/endian.h \ + include/libxls/locale.h \ include/libxls/ole.h \ include/libxls/xlsstruct.h \ include/libxls/xlstypes.h \ @@ -625,6 +626,7 @@ libxlsreader_la_SOURCES = \ src/xlstool.c \ src/endian.c \ + src/locale.c \ src/ole.c \ src/xls.c @@ -746,6 +748,8 @@ src/$(DEPDIR)/$(am__dirstamp) src/libxlsreader_la-endian.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) +src/libxlsreader_la-locale.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) src/libxlsreader_la-ole.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libxlsreader_la-xls.lo: src/$(am__dirstamp) \ @@ -887,6 +891,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@cplusplus/$(DEPDIR)/main.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@fuzz/$(DEPDIR)/fuzz_xls-fuzz_xls.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libxlsreader_la-endian.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libxlsreader_la-locale.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libxlsreader_la-ole.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libxlsreader_la-xls.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libxlsreader_la-xlstool.Plo@am__quote@ @@ -932,6 +937,13 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libxlsreader_la_CFLAGS) $(CFLAGS) -c -o src/libxlsreader_la-endian.lo `test -f 'src/endian.c' || echo '$(srcdir)/'`src/endian.c +src/libxlsreader_la-locale.lo: src/locale.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libxlsreader_la_CFLAGS) $(CFLAGS) -MT src/libxlsreader_la-locale.lo -MD -MP -MF src/$(DEPDIR)/libxlsreader_la-locale.Tpo -c -o src/libxlsreader_la-locale.lo `test -f 'src/locale.c' || echo '$(srcdir)/'`src/locale.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libxlsreader_la-locale.Tpo src/$(DEPDIR)/libxlsreader_la-locale.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/locale.c' object='src/libxlsreader_la-locale.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libxlsreader_la_CFLAGS) $(CFLAGS) -c -o src/libxlsreader_la-locale.lo `test -f 'src/locale.c' || echo '$(srcdir)/'`src/locale.c + src/libxlsreader_la-ole.lo: src/ole.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libxlsreader_la_CFLAGS) $(CFLAGS) -MT src/libxlsreader_la-ole.lo -MD -MP -MF src/$(DEPDIR)/libxlsreader_la-ole.Tpo -c -o src/libxlsreader_la-ole.lo `test -f 'src/ole.c' || echo '$(srcdir)/'`src/ole.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libxlsreader_la-ole.Tpo src/$(DEPDIR)/libxlsreader_la-ole.Plo diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libxls-1.5.3/config.h.in new/libxls-1.6.1/config.h.in --- old/libxls-1.5.3/config.h.in 2020-06-20 13:42:38.000000000 +0200 +++ new/libxls-1.6.1/config.h.in 2020-09-09 16:08:28.000000000 +0200 @@ -50,6 +50,12 @@ /* Define to 1 if you have the <wchar.h> header file. */ #undef HAVE_WCHAR_H +/* Define to 1 if you have the `wcstombs_l' function. */ +#undef HAVE_WCSTOMBS_L + +/* Define to 1 if you have the <xlocale.h> header file. */ +#undef HAVE_XLOCALE_H + /* Define as const if the declaration of iconv() needs const. */ #undef ICONV_CONST diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libxls-1.5.3/configure new/libxls-1.6.1/configure --- old/libxls-1.5.3/configure 2020-06-20 13:42:25.000000000 +0200 +++ new/libxls-1.6.1/configure 2020-09-09 16:08:11.000000000 +0200 @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for libxls 1.5.3. +# Generated by GNU Autoconf 2.69 for libxls 1.6.1. # # Report bugs to <[email protected]>. # @@ -590,8 +590,8 @@ # Identity of this package. PACKAGE_NAME='libxls' PACKAGE_TARNAME='libxls' -PACKAGE_VERSION='1.5.3' -PACKAGE_STRING='libxls 1.5.3' +PACKAGE_VERSION='1.6.1' +PACKAGE_STRING='libxls 1.6.1' PACKAGE_BUGREPORT='[email protected]' PACKAGE_URL='https://github.com/libxls/libxls' @@ -1346,7 +1346,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures libxls 1.5.3 to adapt to many kinds of systems. +\`configure' configures libxls 1.6.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1416,7 +1416,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of libxls 1.5.3:";; + short | recursive ) echo "Configuration of libxls 1.6.1:";; esac cat <<\_ACEOF @@ -1539,7 +1539,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -libxls configure 1.5.3 +libxls configure 1.6.1 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2083,7 +2083,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by libxls $as_me 1.5.3, which was +It was created by libxls $as_me 1.6.1, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2434,11 +2434,15 @@ LIBXLS_MAJOR_VERSION=1 -LIBXLS_MINOR_VERSION=5 -LIBXLS_MICRO_VERSION=3 +LIBXLS_MINOR_VERSION=6 +LIBXLS_MICRO_VERSION=1 VERSION=$LIBXLS_MAJOR_VERSION.$LIBXLS_MINOR_VERSION.$LIBXLS_MICRO_VERSION -VERSION_INFO=`expr $LIBXLS_MAJOR_VERSION + $LIBXLS_MINOR_VERSION`:$LIBXLS_MICRO_VERSION:$LIBXLS_MINOR_VERSION +# if libxls ever goes to 2.0 you will need to change the following line to +# start at whatever the ending value was in the 1.x series E.g. if the last +# minor release was 1.9 then the value of "current" was 11 so the line should +# be changed to `expr 12 + $LIBXLS_MINOR_VERSION` +VERSION_INFO=`expr 2 + $LIBXLS_MINOR_VERSION`:$LIBXLS_MICRO_VERSION @@ -2977,7 +2981,7 @@ # Define the identity of the package. PACKAGE='libxls' - VERSION='1.5.3' + VERSION='1.6.1' cat >>confdefs.h <<_ACEOF @@ -17815,23 +17819,25 @@ fi -for ac_func in strdup +for ac_func in strdup wcstombs_l do : - ac_fn_c_check_func "$LINENO" "strdup" "ac_cv_func_strdup" -if test "x$ac_cv_func_strdup" = xyes; then : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF -#define HAVE_STRDUP 1 +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF fi done -for ac_header in wchar.h +for ac_header in wchar.h xlocale.h do : - ac_fn_c_check_header_mongrel "$LINENO" "wchar.h" "ac_cv_header_wchar_h" "$ac_includes_default" -if test "x$ac_cv_header_wchar_h" = xyes; then : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF -#define HAVE_WCHAR_H 1 +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi @@ -19623,7 +19629,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by libxls $as_me 1.5.3, which was +This file was extended by libxls $as_me 1.6.1, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -19690,7 +19696,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -libxls config.status 1.5.3 +libxls config.status 1.6.1 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libxls-1.5.3/configure.ac new/libxls-1.6.1/configure.ac --- old/libxls-1.5.3/configure.ac 2020-06-20 13:42:08.000000000 +0200 +++ new/libxls-1.6.1/configure.ac 2020-09-09 15:59:32.000000000 +0200 @@ -1,12 +1,16 @@ -AC_INIT([libxls],[1.5.3],[[email protected]], [libxls], [https://github.com/libxls/libxls]) +AC_INIT([libxls],[1.6.1],[[email protected]], [libxls], [https://github.com/libxls/libxls]) AC_CONFIG_SRCDIR([test/test.c]) LIBXLS_MAJOR_VERSION=1 -LIBXLS_MINOR_VERSION=5 -LIBXLS_MICRO_VERSION=3 +LIBXLS_MINOR_VERSION=6 +LIBXLS_MICRO_VERSION=1 VERSION=$LIBXLS_MAJOR_VERSION.$LIBXLS_MINOR_VERSION.$LIBXLS_MICRO_VERSION -VERSION_INFO=`expr $LIBXLS_MAJOR_VERSION + $LIBXLS_MINOR_VERSION`:$LIBXLS_MICRO_VERSION:$LIBXLS_MINOR_VERSION +# if libxls ever goes to 2.0 you will need to change the following line to +# start at whatever the ending value was in the 1.x series E.g. if the last +# minor release was 1.9 then the value of "current" was 11 so the line should +# be changed to `expr 12 + $LIBXLS_MINOR_VERSION` +VERSION_INFO=`expr 2 + $LIBXLS_MINOR_VERSION`:$LIBXLS_MICRO_VERSION AC_SUBST(VERSION_INFO) AC_SUBST(LIBXLS_MAJOR_VERSION) @@ -42,8 +46,8 @@ ]) AM_CONDITIONAL([HAVE_CXX11], [test x$HAVE_CXX11 = x1]) -AC_CHECK_FUNCS([strdup]) -AC_CHECK_HEADERS([wchar.h]) +AC_CHECK_FUNCS([strdup wcstombs_l]) +AC_CHECK_HEADERS([wchar.h xlocale.h]) AC_FUNC_MALLOC AC_FUNC_REALLOC AC_TYPE_SIZE_T diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libxls-1.5.3/include/libxls/locale.h new/libxls-1.6.1/include/libxls/locale.h --- old/libxls-1.5.3/include/libxls/locale.h 1970-01-01 01:00:00.000000000 +0100 +++ new/libxls-1.6.1/include/libxls/locale.h 2020-09-04 17:25:39.000000000 +0200 @@ -0,0 +1,44 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2020 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifdef HAVE_XLOCALE_H +#include <xlocale.h> +#endif +#include <locale.h> + +#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS) +typedef _locale_t xls_locale_t; +#else +typedef locale_t xls_locale_t; +#endif + +xls_locale_t xls_createlocale(void); +void xls_freelocale(xls_locale_t locale); +size_t xls_wcstombs_l(char *restrict s, const wchar_t *restrict pwcs, size_t n, xls_locale_t loc); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libxls-1.5.3/include/libxls/xlsstruct.h new/libxls-1.6.1/include/libxls/xlsstruct.h --- old/libxls-1.5.3/include/libxls/xlsstruct.h 2020-06-12 20:18:12.000000000 +0200 +++ new/libxls-1.6.1/include/libxls/xlsstruct.h 2020-09-04 17:25:39.000000000 +0200 @@ -500,6 +500,10 @@ char *summary; // ole file char *docSummary; // ole file + + void *converter; + void *utf16_converter; + void *utf8_locale; } xlsWorkBook; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libxls-1.5.3/include/libxls/xlstool.h new/libxls-1.6.1/include/libxls/xlstool.h --- old/libxls-1.5.3/include/libxls/xlstool.h 2020-06-12 20:18:12.000000000 +0200 +++ new/libxls-1.6.1/include/libxls/xlstool.h 2020-09-04 17:25:39.000000000 +0200 @@ -37,9 +37,10 @@ void verbose(char* str); -char *utf8_decode(const char *str, DWORD len, char *encoding); -char *unicode_decode(const char *s, size_t len, size_t *newlen, const char* encoding); -char *get_string(const char *s, size_t len, BYTE is2, BYTE isUnicode, char *charset); +char *codepage_decode(const char *s, size_t len, xlsWorkBook *pWB); +char *unicode_decode(const char *s, size_t len, xlsWorkBook *pWB); +char *transcode_utf16_to_utf8(const char *s, size_t len); +char *get_string(const char *s, size_t len, BYTE is2, xlsWorkBook *pWB); DWORD xls_getColor(const WORD color,WORD def); void xls_showBookInfo(xlsWorkBook* pWB); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libxls-1.5.3/src/locale.c new/libxls-1.6.1/src/locale.c --- old/libxls-1.5.3/src/locale.c 1970-01-01 01:00:00.000000000 +0100 +++ new/libxls-1.6.1/src/locale.c 2020-09-04 17:25:39.000000000 +0200 @@ -0,0 +1,64 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2020 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include "config.h" +#include <stdlib.h> +#include "../include/libxls/locale.h" + +xls_locale_t xls_createlocale() { +#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS) + return _create_locale(LC_CTYPE, ".65001"); +#else + return newlocale(LC_CTYPE_MASK, "C.UTF-8", NULL); +#endif +} + +void xls_freelocale(xls_locale_t locale) { + if (!locale) + return; +#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS) + _free_locale(locale); +#else + freelocale(locale); +#endif +} + +size_t xls_wcstombs_l(char *restrict s, const wchar_t *restrict pwcs, size_t n, xls_locale_t loc) { +#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS) + return _wcstombs_l(s, pwcs, n, loc); +#elif defined(HAVE_WCSTOMBS_L) + return wcstombs_l(s, pwcs, n, loc); +#else + locale_t oldlocale = uselocale(loc); + size_t result = wcstombs(s, pwcs, n); + uselocale(oldlocale); + return result; +#endif +} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libxls-1.5.3/src/ole.c new/libxls-1.6.1/src/ole.c --- old/libxls-1.5.3/src/ole.c 2020-06-12 20:18:12.000000000 +0200 +++ new/libxls-1.6.1/src/ole.c 2020-09-04 17:25:39.000000000 +0200 @@ -493,7 +493,7 @@ total_bytes_read = -1; goto cleanup; } - name=unicode_decode(pss->name, pss->bsize, 0, "UTF-8"); + name=transcode_utf16_to_utf8(pss->name, pss->bsize); #ifdef OLE_DEBUG fprintf(stderr, "OLE NAME: %s count=%d\n", name, (int)ole->files.count); #endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libxls-1.5.3/src/xls.c new/libxls-1.6.1/src/xls.c --- old/libxls-1.5.3/src/xls.c 2020-06-12 20:18:12.000000000 +0200 +++ new/libxls-1.6.1/src/xls.c 2020-09-04 17:25:39.000000000 +0200 @@ -40,6 +40,10 @@ #include <stddef.h> #include <errno.h> +#ifdef HAVE_ICONV +#include <iconv.h> +#endif + #include <memory.h> #include <math.h> #include <sys/types.h> @@ -47,6 +51,7 @@ #include <wchar.h> #include "../include/libxls/endian.h" +#include "../include/libxls/locale.h" #include "../include/xls.h" #ifndef min @@ -222,27 +227,26 @@ if (flag & 0x1) { size_t new_len = 0; ln_toread = min((size-ofs)/2, ln); - ret=unicode_decode((char *)buf+ofs,ln_toread*2,&new_len,pWB->charset); + ret=unicode_decode((char *)buf+ofs, ln_toread*2, pWB); - if (ret == NULL) - { + if (ret == NULL) { ret = strdup("*failed to decode utf16*"); - new_len = strlen(ret); } - ret = realloc(ret,new_len+1); - ret[new_len]=0; - ln -= ln_toread; ofs+=ln_toread*2; if (xls_debug) { + new_len = strlen(ret); printf("String16SST: %s(%lu)\n", ret, (unsigned long)new_len); } } else { ln_toread = min((size-ofs), ln); - ret = utf8_decode((char *)buf+ofs, ln_toread, pWB->charset); + ret = codepage_decode((char *)buf+ofs, ln_toread, pWB); + if (ret == NULL) { + ret = strdup("*failed to decode BIFF5 string*"); + } ln -= ln_toread; ofs += ln_toread; @@ -353,7 +357,7 @@ // printf("charset=%s uni=%d\n", pWB->charset, unicode); // printf("bs name %.*s\n", bs->name[0], bs->name+1); - name = get_string(bs->name, size - offsetof(BOUNDSHEET, name), 0, pWB->is5ver, pWB->charset); + name = get_string(bs->name, size - offsetof(BOUNDSHEET, name), 0, pWB); // printf("name=%s\n", name); if(xls_debug) { @@ -643,7 +647,7 @@ tmp=&pWB->fonts.font[pWB->fonts.count]; - tmp->name = get_string(font->name, size - offsetof(FONT, name), 0, pWB->is5ver, pWB->charset); + tmp->name = get_string(font->name, size - offsetof(FONT, name), 0, pWB); tmp->height=font->height; tmp->flag=font->flag; @@ -671,7 +675,7 @@ tmp = &pWB->formats.format[pWB->formats.count]; tmp->index = format->index; - tmp->value = get_string(format->value, size - offsetof(FORMAT, value), (BYTE)!pWB->is5ver, (BYTE)pWB->is5ver, pWB->charset); + tmp->value = get_string(format->value, size - offsetof(FORMAT, value), (BYTE)!pWB->is5ver, pWB); if(xls_debug) xls_showFormat(tmp); pWB->formats.count++; @@ -874,7 +878,6 @@ case XLS_RECORD_BOF: // BIFF5-8 pWB->is5ver = (buf[0] + (buf[1] << 8) != 0x600); pWB->type = buf[2] + (buf[3] << 8); - if(xls_debug) { printf("version: %s\n", pWB->is5ver ? "BIFF5" : "BIFF8" ); printf(" type: %.2X\n", pWB->type); @@ -883,7 +886,7 @@ case XLS_RECORD_CODEPAGE: pWB->codepage = buf[0] + (buf[1] << 8); - if(xls_debug) printf("codepage=%x\n", pWB->codepage); + if(xls_debug) printf("codepage: %d\n", pWB->codepage); break; case XLS_RECORD_CONTINUE: @@ -1010,7 +1013,7 @@ printf(" ident: 0x%x\n", styl->ident); printf(" level: 0x%x\n", styl->lvl); } else { - char *s = get_string((char *)&buf[2], bof1.size - 2, 1, pWB->is5ver, pWB->charset); + char *s = get_string((char *)&buf[2], bof1.size - 2, 1, pWB); printf(" name=%s\n", s); free(s); } @@ -1360,7 +1363,7 @@ case XLS_RECORD_STRING: if(cell && (cell->id == XLS_RECORD_FORMULA || cell->id == XLS_RECORD_FORMULA_ALT)) { xls_cell_set_str(cell, get_string((char *)buf, tmp.size, - (BYTE)!pWB->is5ver, pWB->is5ver, pWB->charset)); + (BYTE)!pWB->is5ver, pWB)); if (xls_debug) xls_showCell(cell); } break; @@ -1460,12 +1463,7 @@ pWB->sheets.count=0; pWB->xfs.count=0; pWB->fonts.count=0; - if (charset) { - pWB->charset = malloc(strlen(charset) * sizeof(char)+1); - strcpy(pWB->charset, charset); - } else { - pWB->charset = strdup("UTF-8"); - } + pWB->charset = strdup(charset ? charset : "UTF-8"); retval = xls_parseWorkBook(pWB); @@ -1600,8 +1598,18 @@ if(pWB->summary) free(pWB->summary); if(pWB->docSummary) free(pWB->docSummary); - // TODO - free other dynamically allocated objects like string table?? - free(pWB); +#ifdef HAVE_ICONV + if (pWB->converter) + iconv_close((iconv_t)pWB->converter); + if (pWB->utf16_converter) + iconv_close((iconv_t)pWB->utf16_converter); +#endif + + if (pWB->utf8_locale) + xls_freelocale((xls_locale_t)pWB->utf8_locale); + + // TODO - free other dynamically allocated objects like string table?? + free(pWB); } void xls_close_WS(xlsWorkSheet* pWS) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libxls-1.5.3/src/xlstool.c new/libxls-1.6.1/src/xlstool.c --- old/libxls-1.5.3/src/xlstool.c 2020-06-12 20:18:12.000000000 +0200 +++ new/libxls-1.6.1/src/xlstool.c 2020-09-04 17:25:39.000000000 +0200 @@ -42,18 +42,9 @@ #ifdef HAVE_ICONV #include <iconv.h> - -#if defined(_AIX) || defined(__sun) -static const char *from_enc = "UTF-16le"; -#else -static const char *from_enc = "UTF-16LE"; #endif -#else -#include <locale.h> #include <limits.h> -#endif - #include <stdlib.h> #include <errno.h> #include <memory.h> @@ -65,6 +56,7 @@ #include "../include/libxls/xlstool.h" #include "../include/libxls/brdb.h" #include "../include/libxls/endian.h" +#include "../include/libxls/locale.h" extern int xls_debug; @@ -153,77 +145,70 @@ printf("libxls : %s\n",str); } -char *utf8_decode(const char *str, DWORD len, char *encoding) -{ - int utf8_chars = 0; - char *ret = NULL; - DWORD i; - - for(i=0; i<len; ++i) { - if(str[i] & (BYTE)0x80) { - ++utf8_chars; - } - } - - if(utf8_chars == 0 || strcmp(encoding, "UTF-8")) { - ret = malloc(len+1); - memcpy(ret, str, len); - ret[len] = 0; - } else { - DWORD i; - char *out; - // UTF-8 encoding inline - ret = malloc(len+utf8_chars+1); - out = ret; - for(i=0; i<len; ++i) { - BYTE c = str[i]; - if(c & (BYTE)0x80) { - *out++ = (BYTE)0xC0 | (c >> 6); - *out++ = (BYTE)0x80 | (c & 0x3F); - } else { - *out++ = c; - } - } - *out = 0; - } +#ifdef HAVE_ICONV - return ret; +struct codepage_entry_t { + int code; + const char *name; +}; + +static struct codepage_entry_t _codepage_entries[] = { + { .code = 874, .name = "WINDOWS-874" }, + { .code = 932, .name = "SHIFT-JIS" }, + { .code = 936, .name = "WINDOWS-936" }, + { .code = 950, .name = "BIG-5" }, + { .code = 951, .name = "BIG5-HKSCS" }, + { .code = 1250, .name = "WINDOWS-1250" }, + { .code = 1251, .name = "WINDOWS-1251" }, + { .code = 1252, .name = "WINDOWS-1252" }, + { .code = 1253, .name = "WINDOWS-1253" }, + { .code = 1254, .name = "WINDOWS-1254" }, + { .code = 1255, .name = "WINDOWS-1255" }, + { .code = 1256, .name = "WINDOWS-1256" }, + { .code = 1257, .name = "WINDOWS-1257" }, + { .code = 1258, .name = "WINDOWS-1258" }, + { .code = 10000, .name = "MACROMAN" }, + { .code = 10004, .name = "MACARABIC" }, + { .code = 10005, .name = "MACHEBREW" }, + { .code = 10006, .name = "MACGREEK" }, + { .code = 10007, .name = "MACCYRILLIC" }, + { .code = 10010, .name = "MACROMANIA" }, + { .code = 10017, .name = "MACUKRAINE" }, + { .code = 10021, .name = "MACTHAI" }, + { .code = 10029, .name = "MACCENTRALEUROPE" }, + { .code = 10079, .name = "MACICELAND" }, + { .code = 10081, .name = "MACTURKISH" }, + { .code = 10082, .name = "MACCROATIAN" }, +}; + +static int codepage_compare(const void *key, const void *value) { + const struct codepage_entry_t *cp1 = key; + const struct codepage_entry_t *cp2 = value; + return cp1->code - cp2->code; } -#ifdef HAVE_ICONV -static char* unicode_decode_iconv(const char *s, size_t len, size_t *newlen, const char* to_enc) { +static const char *encoding_for_codepage(WORD codepage) { + struct codepage_entry_t key = { .code = codepage }; + struct codepage_entry_t *result = bsearch(&key, _codepage_entries, + sizeof(_codepage_entries)/sizeof(_codepage_entries[0]), + sizeof(_codepage_entries[0]), &codepage_compare); + if (result) { + return result->name; + } + return "WINDOWS-1252"; +} + +static char* unicode_decode_iconv(const char *s, size_t len, iconv_t ic) { char* outbuf = 0; - if(s && len && from_enc && to_enc) + if(s && len && ic) { size_t outlenleft = len; int outlen = len; size_t inlenleft = len; - iconv_t ic = iconv_open(to_enc, from_enc); const char* src_ptr = s; char* out_ptr = 0; - if(ic == (iconv_t)-1) - { - // Something went wrong. - if (errno == EINVAL) - { - if (!strcmp(to_enc, "ASCII")) - { - ic = iconv_open("UTF-8", from_enc); - if(ic == (iconv_t)-1) - { - printf("conversion from '%s' to '%s' not available", from_enc, to_enc); - return outbuf; - } - } - } - else - { - printf ("iconv_open: error=%d", errno); - return outbuf; - } - } size_t st; outbuf = malloc(outlen + 1); @@ -255,13 +240,8 @@ } } } - iconv_close(ic); outlen -= outlenleft; - if (newlen) - { - *newlen = outbuf ? outlen : 0; - } if(outbuf) { outbuf[outlen] = 0; @@ -270,18 +250,15 @@ return outbuf; } -#else +#endif -static char *unicode_decode_wcstombs(const char *s, size_t len, size_t *newlen) { +// Convert UTF-16 to UTF-8 without iconv +static char *unicode_decode_wcstombs(const char *s, size_t len, xls_locale_t locale) { // Do wcstombs conversion char *converted = NULL; int count, count2; size_t i; - wchar_t *w; - if (setlocale(LC_CTYPE, "") == NULL) { - printf("setlocale failed: %d\n", errno); - return NULL; - } + wchar_t *w = NULL; w = malloc((len/2+1)*sizeof(wchar_t)); @@ -291,39 +268,119 @@ } w[len/2] = '\0'; - count = wcstombs(NULL, w, INT_MAX); + count = xls_wcstombs_l(NULL, w, INT_MAX, locale); if (count <= 0) { - if (newlen) *newlen = 0; - free(w); - return NULL; + goto cleanup; } converted = calloc(count+1, sizeof(char)); - count2 = wcstombs(converted, w, count); - free(w); + count2 = xls_wcstombs_l(converted, w, count, locale); if (count2 <= 0) { printf("wcstombs failed (%lu)\n", (unsigned long)len/2); - if (newlen) *newlen = 0; - return converted; + goto cleanup; } - if (newlen) *newlen = count2; + +cleanup: + free(w); return converted; } + +// Converts Latin-1 to UTF-8 the old-fashioned way +static char *transcode_latin1_to_utf8(const char *str, DWORD len) +{ + int utf8_chars = 0; + char *ret = NULL; + DWORD i; + + for(i=0; i<len; ++i) { + if(str[i] & (BYTE)0x80) { + ++utf8_chars; + } + } + + char *out = ret = malloc(len+utf8_chars+1); + // UTF-8 encoding inline + for(i=0; i<len; ++i) { + BYTE c = str[i]; + if(c & (BYTE)0x80) { + *out++ = (BYTE)0xC0 | (c >> 6); + *out++ = (BYTE)0x80 | (c & 0x3F); + } else { + *out++ = c; + } + } + *out = 0; + + return ret; +} + +// Convert BIFF5 string or compressed BIFF8 string to the encoding desired +// by the workbook. Returns a NUL-terminated string +char* codepage_decode(const char *s, size_t len, xlsWorkBook *pWB) { + if (!pWB->is5ver && strcmp(pWB->charset, "UTF-8") == 0) + return transcode_latin1_to_utf8(s, len); + +#ifdef HAVE_ICONV + if (!pWB->converter) { + const char *from_encoding = pWB->is5ver ? encoding_for_codepage(pWB->codepage) : "ISO-8859-1"; + iconv_t converter = iconv_open(pWB->charset, from_encoding); + if (converter == (iconv_t)-1) { + printf("conversion from '%s' to '%s' not available", from_encoding, pWB->charset); + return NULL; + } + pWB->converter = (void *)converter; + } + return unicode_decode_iconv(s, len, pWB->converter); +#else + char *ret = malloc(len+1); + memcpy(ret, s, len); + ret[len] = 0; + return ret; #endif +} -// Convert unicode string to to_enc encoding -char* unicode_decode(const char *s, size_t len, size_t *newlen, const char* to_enc) +// Convert unicode string to UTF-8 +char* transcode_utf16_to_utf8(const char *s, size_t len) { + xls_locale_t locale = xls_createlocale(); + char *result = unicode_decode_wcstombs(s, len, locale); + xls_freelocale(locale); + return result; +} + +// Convert unicode string to the encoding desired by the workbook +char* unicode_decode(const char *s, size_t len, xlsWorkBook *pWB) { #ifdef HAVE_ICONV - return unicode_decode_iconv(s, len, newlen, to_enc); +#if defined(_AIX) || defined(__sun) + const char *from_enc = "UTF-16le"; #else - return unicode_decode_wcstombs(s, len, newlen); + const char *from_enc = "UTF-16LE"; +#endif + if (!pWB->utf16_converter) { + iconv_t converter = iconv_open(pWB->charset, from_enc); + if (converter == (iconv_t)-1) { + printf("conversion from '%s' to '%s' not available\n", from_enc, pWB->charset); + return NULL; + } + pWB->utf16_converter = (void *)converter; + } + return unicode_decode_iconv(s, len, pWB->utf16_converter); +#else + if (!pWB->utf8_locale) { + xls_locale_t locale = xls_createlocale(); + if (locale == NULL) { + printf("creation of UTF-8 locale failed\n"); + return NULL; + } + pWB->utf8_locale = (void *)locale; + } + return unicode_decode_wcstombs(s, len, pWB->utf8_locale); #endif } // Read and decode string -char *get_string(const char *s, size_t len, BYTE is2, BYTE is5ver, char *charset) +char *get_string(const char *s, size_t len, BYTE is2, xlsWorkBook* pWB) { WORD ln; DWORD ofs = 0; @@ -347,7 +404,7 @@ ofs++; } - if(!is5ver) { + if(!pWB->is5ver) { // unicode strings have a format byte before the string if (ofs + 1 > len) { return NULL; @@ -369,12 +426,12 @@ if (ofs + 2*ln > len) { return NULL; } - ret = unicode_decode(str+ofs, ln*2, NULL, charset); + ret = unicode_decode(str+ofs, ln*2, pWB); } else { if (ofs + ln > len) { return NULL; } - ret = utf8_decode(str+ofs, ln, charset); + ret = codepage_decode(str+ofs, ln, pWB); } #if 0 // debugging @@ -612,17 +669,10 @@ case XLS_RECORD_RSTRING: len = label[0] + (label[1] << 8); label += 2; - if(pWB->is5ver) { - ret = malloc(len+1); - memcpy(ret, label, len); - ret[len] = 0; - //printf("Found BIFF5 string of len=%d \"%s\"\n", len, ret); - } else { - if ((*(label++) & 0x01) == 0) { - ret = utf8_decode((char *)label, len, pWB->charset); - } else { - ret = unicode_decode((char *)label, len*2, NULL, pWB->charset); - } + if (pWB->is5ver || (*(label++) & 0x01) == 0) { + ret = codepage_decode((char *)label, len, pWB); + } else { + ret = unicode_decode((char *)label, len*2, pWB); } break; case XLS_RECORD_RK:
