Hello community, here is the log from the commit of package hfst for openSUSE:Factory checked in at 2019-12-21 12:32:06 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/hfst (Old) and /work/SRC/openSUSE:Factory/.hfst.new.6675 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "hfst" Sat Dec 21 12:32:06 2019 rev:4 rq:758345 version:3.15.2 Changes: -------- --- /work/SRC/openSUSE:Factory/hfst/hfst.changes 2019-08-23 11:09:33.206459796 +0200 +++ /work/SRC/openSUSE:Factory/.hfst.new.6675/hfst.changes 2019-12-21 12:32:40.779400280 +0100 @@ -1,0 +2,15 @@ +Thu Dec 19 22:17:10 UTC 2019 - Jan Engelhardt <[email protected]> + +- Update to release 3.15.2 + * Fix off-by-one bug in checking captures in pmatch. + * hfst-pmatch2fst now implements eg @include"filename.txt" as a + preprocessing stage. + * hfst-pmatch2fst now implements Explode() and Implode() in + full generality. + * hfst-pmatch now allows printing weights (in location mode, + with --locate). + * Add tool hfst-eliminate-flags. + * Allow both escaped and unescaped zeros in multicharacters in + all lexc lexicon entries. + +------------------------------------------------------------------- Old: ---- hfst-3.15.0.tar.gz New: ---- hfst-3.15.2.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ hfst.spec ++++++ --- /var/tmp/diff_new_pack.UpnHWn/_old 2019-12-21 12:32:41.243400500 +0100 +++ /var/tmp/diff_new_pack.UpnHWn/_new 2019-12-21 12:32:41.247400502 +0100 @@ -1,7 +1,7 @@ # # spec file for package hfst # -# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,7 +17,7 @@ Name: hfst -Version: 3.15.0 +Version: 3.15.2 Release: 0 Summary: Helsinki Finite-State Transducer Technology License: GPL-3.0-or-later AND GPL-3.0-only AND GPL-2.0-or-later AND GPL-2.0-only AND GPL-3.0-only AND Apache-2.0 @@ -71,12 +71,12 @@ OpenFst is a library for constructing, combining, optimizing, and searching weighted finite-state transducers (FSTs). -%package -n libhfst52 +%package -n libhfst53 Summary: Helsinki Finite-State Transducer Technology Libraries License: GPL-3.0-only Group: System/Libraries -%description -n libhfst52 +%description -n libhfst53 The Helsinki Finite-State Transducer software is intended for the implementation of morphological analyzers and other tools which are based on weighted and unweighted finite-state transducer technology. @@ -97,7 +97,7 @@ Summary: Development files for the Helsinki Finite-State Transducer License: GPL-3.0-only Group: Development/Libraries/C and C++ -Requires: libhfst52 = %version +Requires: libhfst53 = %version %description devel The Helsinki Finite-State Transducer software is intended for the @@ -127,8 +127,8 @@ %postun -n libfoma-hfst0 -p /sbin/ldconfig %post -n libfst-hfst0 -p /sbin/ldconfig %postun -n libfst-hfst0 -p /sbin/ldconfig -%post -n libhfst52 -p /sbin/ldconfig -%postun -n libhfst52 -p /sbin/ldconfig +%post -n libhfst53 -p /sbin/ldconfig +%postun -n libhfst53 -p /sbin/ldconfig %post -n libsfst-hfst0 -p /sbin/ldconfig %postun -n libsfst-hfst0 -p /sbin/ldconfig @@ -145,8 +145,8 @@ %_libdir/libfst-hfst.so.0* %doc back-ends/openfst/COPYING -%files -n libhfst52 -%_libdir/libhfst.so.52* +%files -n libhfst53 +%_libdir/libhfst.so.53* %files -n libsfst-hfst0 %_libdir/libsfst-hfst.so.0* ++++++ hfst-3.15.0.tar.gz -> hfst-3.15.2.tar.gz ++++++ ++++ 4059 lines of diff (skipped) ++++ retrying with extended exclude list diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/Makefile.am new/hfst-3.15.2/Makefile.am --- old/hfst-3.15.0/Makefile.am 2017-03-08 12:58:25.000000000 +0100 +++ new/hfst-3.15.2/Makefile.am 2019-11-12 10:56:09.000000000 +0100 @@ -17,7 +17,11 @@ ACLOCAL_AMFLAGS = -I m4 -SUBDIRS = back-ends libhfst tools test doc man scripts python +SUBDIRS = back-ends libhfst tools test doc man scripts + +if HAVE_PYTHON_BINDINGS +SUBDIRS += python +endif # for external projects to use HFST from auto* m4dir=$(datadir)/aclocal @@ -29,6 +33,7 @@ doxygen: doxygen Doxyfile endif + valgrind: $(MAKE) -C libhfst/src/ valgrind $(MAKE) -C test/tools/ valgrind diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/NEWS new/hfst-3.15.2/NEWS --- old/hfst-3.15.0/NEWS 2018-10-02 14:58:36.000000000 +0200 +++ new/hfst-3.15.2/NEWS 2019-11-12 11:08:23.000000000 +0100 @@ -5,6 +5,18 @@ This file contains all noteworthy changes in HFST development between releases. For full listing of changes see ChangeLog. +Noteworthy changes in 3.15.2 +---------------------------- + +* todo + + +Noteworthy changes in 3.15.1 +---------------------------- + +* todo + + Noteworthy changes in 3.15.0 ---------------------------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/config.h.in new/hfst-3.15.2/config.h.in --- old/hfst-3.15.0/config.h.in 2018-10-02 15:03:14.000000000 +0200 +++ new/hfst-3.15.2/config.h.in 2019-11-12 11:08:34.000000000 +0100 @@ -257,6 +257,9 @@ /* if using glib for unicode string handling */ #undef USE_GLIB_UNICODE +/* if using icu for unicode string handling */ +#undef USE_ICU_UNICODE + /* Define unordered container namespace */ #undef USE_TR1_UNORDERED_MAP_AND_SET diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/configure.ac new/hfst-3.15.2/configure.ac --- old/hfst-3.15.0/configure.ac 2018-10-02 14:59:07.000000000 +0200 +++ new/hfst-3.15.2/configure.ac 2019-11-12 11:01:24.000000000 +0100 @@ -19,7 +19,7 @@ HFST_NAME=hfst HFST_MAJOR=3 HFST_MINOR=15 -HFST_EXTENSION=0 +HFST_EXTENSION=2 HFST_VERSION=$HFST_MAJOR.$HFST_MINOR.$HFST_EXTENSION ### When the VERSION is INCREMENTED, REMEMBER to increment the LONGVERSION too. @@ -28,10 +28,10 @@ LIBHFST_NAME=hfst LIBHFST_MAJOR=3 LIBHFST_MINOR=15 -LIBHFST_EXTENSION=0 +LIBHFST_EXTENSION=2 LIBHFST_VERSION=$LIBHFST_MAJOR.$LIBHFST_MINOR.$LIBHFST_EXTENSION -AC_INIT([hfst], [3.15.0], [[email protected]], [hfst]) +AC_INIT([hfst], [3.15.2], [[email protected]], [hfst]) AC_CONFIG_AUX_DIR([build-aux]) AM_INIT_AUTOMAKE([-Wall std-options foreign check-news]) @@ -44,8 +44,8 @@ AC_SUBST([LIBHFST_MAJOR], [3]) AC_SUBST([LIBHFST_MINOR], [15]) -AC_SUBST([LIBHFST_EXTENSION], [0]) -AC_SUBST([LIBHFST_VERSION], [3.15.0]) +AC_SUBST([LIBHFST_EXTENSION], [2]) +AC_SUBST([LIBHFST_VERSION], [3.15.2]) AC_SUBST([LIBHFST_NAME], [hfst]) # long version = version vector cast in base 10000, for automatic comparisons @@ -56,7 +56,7 @@ # $LIBHFST_MINOR * 10000 + $LIBHFST_EXTENSION + "L" # NB! It turned out to be not portable, and can't be used! -AC_DEFINE([HFST_LONGVERSION], [300150000L], +AC_DEFINE([HFST_LONGVERSION], [300150002L], [Define to hfst version vector as long in base 10000]) AC_DEFINE([HFST_REVISION], ["$Revision$"], [Automatically substitute to configure.ac revision]) @@ -608,7 +608,7 @@ AC_PATH_PROG([GETOPT], [getopts], [false]) -AM_PATH_PYTHON([2.4],[],[false]) +AM_PATH_PYTHON([3.0],[],[false]) # if flex 3.X ever comes out, this breaks :) LEX_VERSION=`$LEX --version` AS_IF([test -z "`echo "$LEX_VERSION" | grep 'flex 2\.[[5-9]]\.[[3-9][4-9]]'`"], @@ -659,7 +659,10 @@ fi AC_DEFINE([USE_GLIB_UNICODE], 1, [if using glib for unicode string handling])]) AS_IF([test "x$with_unicode_handler" = "xicu"], - [AC_MSG_FAILURE([ICU not yet implemented (--with-unicode-handler=hfst to disable)])]) + [AC_CHECK_ICU([50], + [AC_DEFINE([USE_ICU_UNICODE], 1, [if using icu for unicode string handling])], + [AC_MSG_FAILURE([--with-unicode-handler=icu requested but icu>=50 not found])]) + ]) AS_IF([test "x$with_unicode_handler" = "xyes"], [AC_MSG_FAILURE([--with-unicode-handler=yes; unicode handler 'yes' not recognized])]) AS_IF([test "x$with_unicode_handler" = "xno"], @@ -742,6 +745,26 @@ AM_CONDITIONAL([WINDOWS], [test x$version_type = xwindows]) +# Python bindings +AC_ARG_ENABLE([python-bindings], + AS_HELP_STRING([--enable-python-bindings], + [build python bindings (default=disabled)]), + [enable_python_bindings=$enableval], + [enable_python_bindings=no]) +AM_CONDITIONAL([HAVE_PYTHON_BINDINGS], [test x$enable_python_bindings = xyes]) + +if test "x$enable_python_bindings" = "xyes" +then + AM_PATH_PYTHON([3.4], [], [AC_MSG_WARN([Can't generate SWIG wrapper without Python])]) + AC_CONFIG_FILES([python/Makefile python/test/Makefile]) + + AC_ARG_VAR([PYTHON_INSTALL_PARAMS], [Parameters to pass to the Python 3 module install step]) + if test "x$PYTHON_INSTALL_PARAMS" = "x" + then + PYTHON_INSTALL_PARAMS="--prefix=\$(prefix) --root=\$(DESTDIR)/" + fi +fi + # Checks for system services # config files @@ -772,8 +795,6 @@ back-ends/foma/Makefile back-ends/sfst/Makefile man/Makefile - python/Makefile - python/test/Makefile scripts/Makefile]) AC_CONFIG_FILES([scripts/hfst-foma-wrapper.sh], [chmod +x scripts/hfst-foma-wrapper.sh]) AC_CONFIG_FILES([scripts/hfst-foma], [chmod +x scripts/hfst-foma]) @@ -871,6 +892,5 @@ [AC_MSG_WARN([hfst-calculate is not enabled; you will not be able to compile SFST-PL scripts; enable using --enable-calculate])]) AS_IF([test "x$enable_xfst" == "xno"], [AC_MSG_WARN([hfst-xfst is not enabled; you will not be able to compile XFST scripts; enable using --enable-xfst])]) -AC_MSG_WARN([Python bindings for HFST are not under autotools; see python/README for instructions about how to build and install them]) AS_IF([test "$automake_version" \< "1.12"], [AC_MSG_WARN([automake version < 1.12; building parsers will fail unless you are building with pre-yacc-generated *.cc files and do not modify the *.yy source files])]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/libhfst/hfst.pc.in new/hfst-3.15.2/libhfst/hfst.pc.in --- old/hfst-3.15.0/libhfst/hfst.pc.in 2017-03-08 12:58:26.000000000 +0100 +++ new/hfst-3.15.2/libhfst/hfst.pc.in 2019-04-04 17:33:27.000000000 +0200 @@ -7,4 +7,4 @@ Description: Finite-state transducer library bridge for multiple FLOSS packages Version: @LIBHFST_VERSION@ Libs: -L${libdir} -l@LIBHFST_NAME@ -Cflags: -I${includedir}/hfst/ @GLIB_CFLAGS@ +Cflags: -I${includedir}/hfst/ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/libhfst/src/Makefile.am new/hfst-3.15.2/libhfst/src/Makefile.am --- old/hfst-3.15.0/libhfst/src/Makefile.am 2018-10-02 15:00:03.000000000 +0200 +++ new/hfst-3.15.2/libhfst/src/Makefile.am 2019-11-12 11:01:40.000000000 +0100 @@ -30,7 +30,7 @@ # libtool takes over libhfst_la_SOURCES = $(HFST_SRCS) libhfst_la_LIBADD = implementations/libhfstimplementations.la \ - parsers/libhfstparsers.la + parsers/libhfstparsers.la $(ICU_LIBS) if WANT_SFST libhfst_la_LIBADD += $(top_builddir)/back-ends/sfst/libsfst.la @@ -104,7 +104,7 @@ hfstinclude_HEADERS = $(HFST_HDRS) -libhfst_la_LDFLAGS = -no-undefined -version-info 52:0:0 +libhfst_la_LDFLAGS = -no-undefined -version-info 53:0:0 LIBHFST_TSTS=HfstApply HfstInputStream HfstTransducer \ HfstOutputStream HfstXeroxRules HfstRules HfstSymbolDefs \ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/libhfst/src/implementations/optimized-lookup/pmatch.cc new/hfst-3.15.2/libhfst/src/implementations/optimized-lookup/pmatch.cc --- old/hfst-3.15.0/libhfst/src/implementations/optimized-lookup/pmatch.cc 2018-10-02 12:24:23.000000000 +0200 +++ new/hfst-3.15.2/libhfst/src/implementations/optimized-lookup/pmatch.cc 2019-11-12 10:56:09.000000000 +0100 @@ -669,7 +669,7 @@ if (symbol.size() < 3) { return false; } - if (symbol == "@PMATCH_INPUT_MARK@") { // seems like is_special symbols can't be referred to in pmatch scripts + if (symbol == "@PMATCH_INPUT_MARK@" || symbol == "@PMATCH_BACKTRACK@") { // seems like is_special symbols can't be referred to in pmatch scripts return false; } if (is_insertion(symbol) || symbol == "@BOUNDARY@") { @@ -1198,7 +1198,7 @@ SymbolNumberVector::iterator begin, SymbolNumberVector::iterator end) { - if (pos + (end - begin) > input.size()) { + if (pos + (end - begin) >= input.size()) { return false; } for (size_t i = 0; begin + i != end; ++i) { @@ -1388,7 +1388,7 @@ SymbolNumber k = NO_SYMBOL_NUMBER; SymbolNumber boundary_sym = alphabet.get_special(boundary); char * single_codepoint_scratch; - char single_codepoint_scratch_orig[5]{}; + char single_codepoint_scratch_orig[5] = {}; if (boundary_sym != NO_SYMBOL_NUMBER) { input.push_back(boundary_sym); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/libhfst/src/implementations/optimized-lookup/pmatch_tokenize.cc new/hfst-3.15.2/libhfst/src/implementations/optimized-lookup/pmatch_tokenize.cc --- old/hfst-3.15.0/libhfst/src/implementations/optimized-lookup/pmatch_tokenize.cc 2018-10-02 14:49:30.000000000 +0200 +++ new/hfst-3.15.2/libhfst/src/implementations/optimized-lookup/pmatch_tokenize.cc 2019-11-12 10:56:09.000000000 +0100 @@ -1,4 +1,4 @@ -// Copyright (c) 2016-2017 University of Helsinki +// Copyright (c) 2016-2019 University of Helsinki // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -244,7 +244,28 @@ } if (s.print_weights) { - outstream << " <" << wtag << ":" << weight << ">"; + std::ostringstream w; + w << std::fixed << std::setprecision(9) << weight; + std::string rounded = w.str(); + bool seendot = false; + bool inzeroes = true; + size_t firstzero = rounded.length(); + for(size_t i = rounded.length(); i > 0; --i) { + if(inzeroes && rounded[i-1] == '0') { + firstzero = i; // not i-1, keep one zero + } + else { + inzeroes = false; + } + if(rounded[i-1] == '.') { + seendot = true; + break; + } + } + if(seendot) { + rounded = rounded.substr(0, firstzero); + } + outstream << " <" << wtag << ":" << rounded << ">"; } if (in_beg != in_end) { std::ostringstream form; @@ -437,8 +458,8 @@ hfst::StringVector words = split_at(in_syms, &*(bt_points)); for(hfst::StringVector::const_iterator it = words.begin(); it != words.end(); ++it) { // Trim left/right spaces: - const size_t first = it->find_first_not_of(' '); - const size_t last = it->find_last_not_of(' ') + 1; + const size_t first = find_first_not_of_def(*it, ' ', 0); + const size_t last = 1 + find_last_not_of_def(*it, ' ', it->length() - 1); string form = it->substr(first, last-first); LocationVector loc = locate_fullmatch(container, form, s); if(loc.size() == 0 && s.verbose) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/libhfst/src/implementations/optimized-lookup/pmatch_tokenize.h new/hfst-3.15.2/libhfst/src/implementations/optimized-lookup/pmatch_tokenize.h --- old/hfst-3.15.0/libhfst/src/implementations/optimized-lookup/pmatch_tokenize.h 2018-04-13 13:40:19.000000000 +0200 +++ new/hfst-3.15.2/libhfst/src/implementations/optimized-lookup/pmatch_tokenize.h 2019-11-12 10:56:09.000000000 +0100 @@ -1,4 +1,4 @@ -// Copyright (c) 2016-2017 University of Helsinki +// Copyright (c) 2016-2019 University of Helsinki // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -10,6 +10,7 @@ #define _HFST_OL_TRANSDUCER_PMATCH_TOKENIZE_H_ #include <iostream> +#include <iomanip> #include <iterator> #include "pmatch.h" @@ -54,5 +55,24 @@ } +inline std::size_t find_first_not_of_def(const std::string & str, char c, std::size_t def) { + auto ret = str.find_first_not_of(c); + if(ret == std::string::npos) { + return def; + } + else { + return ret; + } +} + +inline std::size_t find_last_not_of_def(const std::string & str, char c, std::size_t def) { + auto ret = str.find_last_not_of(c); + if(ret == std::string::npos) { + return def; + } + else { + return ret; + } +} #endif //_HFST_OL_TRANSDUCER_PMATCH_TOKENIZE_H_ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/libhfst/src/parsers/Makefile.am new/hfst-3.15.2/libhfst/src/parsers/Makefile.am --- old/hfst-3.15.0/libhfst/src/parsers/Makefile.am 2018-08-22 14:02:45.000000000 +0200 +++ new/hfst-3.15.2/libhfst/src/parsers/Makefile.am 2019-04-04 17:33:27.000000000 +0200 @@ -121,7 +121,7 @@ libhfstparsers_la_SOURCES=$(XRE_SRCS) $(PMATCH_SRCS) $(LEXC_SRCS) $(XFST_SRCS) $(SFST_SRCS) $(TWOLC_SRCS) AM_CPPFLAGS=-I${top_srcdir}/libhfst/src/parsers -I${top_srcdir}/libhfst/src \ - -Wno-deprecated + -Wno-deprecated ${GLIB_CPPFLAGS} ${ICU_CPPFLAGS} if WANT_MINGW AM_CPPFLAGS += -I${top_srcdir}/back-ends/dlfcn -DWINDOWS diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/libhfst/src/parsers/pmatch_utils.cc new/hfst-3.15.2/libhfst/src/parsers/pmatch_utils.cc --- old/hfst-3.15.0/libhfst/src/parsers/pmatch_utils.cc 2018-10-02 14:48:52.000000000 +0200 +++ new/hfst-3.15.2/libhfst/src/parsers/pmatch_utils.cc 2019-04-04 17:33:27.000000000 +0200 @@ -1232,15 +1232,21 @@ timer = clock(); } - if (inserted_names.size() > 0) { + if (inserted_names.size() > 0 || def_insed_expressions.size() > 0) { HfstTransducer dummy(format); // We keep TOP and any inserted transducers std::map<std::string, PmatchObject *>::iterator defs_it; for (defs_it = definitions.begin(); defs_it != definitions.end(); ++defs_it) { if (defs_it->first.compare("TOP") == 0 || - inserted_names.count(defs_it->first) != 0) { - HfstTransducer * tmp = defs_it->second->evaluate(); + inserted_names.count(defs_it->first) != 0 || + def_insed_expressions.count(defs_it->first) != 0) { + HfstTransducer * tmp = NULL; + if (def_insed_expressions.count(defs_it->first) != 0) { + tmp = def_insed_expressions[defs_it->first]->evaluate(); + } else { + tmp = defs_it->second->evaluate(); + } tmp->minimize(); dummy.harmonize(*tmp); // This is what it will be called in the archive @@ -1705,10 +1711,186 @@ return retval; } +#if USE_GLIB_UNICODE +std::string PmatchUtilityTransducers::string_from_g_unichar(gunichar ch) +{ + char buf[7]; + int buflen = g_unichar_to_utf8(ch, buf); + buf[buflen] = '\0'; + return std::string(buf); +} +#endif + +HfstTransducer PmatchUtilityTransducers::get_lowercase_acceptor_from_transducer(HfstTransducer & t) +{ +#if USE_GLIB_UNICODE + HfstTransducer lowercase(t.get_type()); + StringSet ss = t.get_alphabet(); + for (StringSet::const_iterator it = ss.begin(); it != ss.end(); ++it) { + if (g_utf8_strlen(it->c_str(), -1) == 1) { + gunichar this_unichar = g_utf8_get_char_validated(it->c_str(), -1); + if (g_unichar_islower(this_unichar)) { + lowercase.disjunct(HfstTransducer(*it, t.get_type())); + } + } + } +#elif USE_ICU_UNICODE + HfstTransducer lowercase(t.get_type()); + StringSet ss = t.get_alphabet(); + for (StringSet::const_iterator it = ss.begin(); it != ss.end(); ++it) { + icu::UnicodeString us(it->c_str()); + if (us.countChar32() == 1) { + if (u_islower(us.char32At(0))) { + lowercase.disjunct(HfstTransducer(*it, t.get_type())); + } + } + } +#else + HfstTransducer lowercase(*latin1_lowercase_acceptor); +#endif + return lowercase; +} + +HfstTransducer PmatchUtilityTransducers::get_uppercase_acceptor_from_transducer(HfstTransducer & t) +{ +#if USE_GLIB_UNICODE + HfstTransducer uppercase(t.get_type()); + StringSet ss = t.get_alphabet(); + for (StringSet::const_iterator it = ss.begin(); it != ss.end(); ++it) { + if (g_utf8_strlen(it->c_str(), -1) == 1) { + gunichar this_unichar = g_utf8_get_char_validated(it->c_str(), -1); + if (g_unichar_isupper(this_unichar)) { + uppercase.disjunct(HfstTransducer(*it, t.get_type())); + } + } + } +#elif USE_ICU_UNICODE + HfstTransducer uppercase(t.get_type()); + StringSet ss = t.get_alphabet(); + for (StringSet::const_iterator it = ss.begin(); it != ss.end(); ++it) { + icu::UnicodeString us(it->c_str()); + if (us.countChar32() == 1) { + if (u_isupper(us.char32At(0))) { + uppercase.disjunct(HfstTransducer(*it, t.get_type())); + } + } + } +#else + HfstTransducer uppercase(*latin1_uppercase_acceptor); +#endif + return uppercase; +} + +HfstTransducer PmatchUtilityTransducers::lowercaser_from_transducer(HfstTransducer & t) +{ +#if USE_GLIB_UNICODE + HfstTransducer lowercase(t.get_type()); + StringSet ss = t.get_alphabet(); + StringSet uppercases_seen; + for (StringSet::const_iterator it = ss.begin(); it != ss.end(); ++it) { + if (g_utf8_strlen(it->c_str(), -1) == 1) { + gunichar this_unichar = g_utf8_get_char_validated(it->c_str(), -1); + if (g_unichar_isalpha(this_unichar)) { + std::string upper = string_from_g_unichar(g_unichar_toupper(this_unichar)); + if (uppercases_seen.count(upper) != 0) { + continue; + } + uppercases_seen.insert(upper); + std::string lower = string_from_g_unichar(g_unichar_tolower(this_unichar)); + lowercase.disjunct(HfstTransducer(upper, lower, t.get_type())); + } + } + } +#elif USE_ICU_UNICODE + HfstTransducer lowercase(t.get_type()); + StringSet ss = t.get_alphabet(); + StringSet uppercases_seen; + for (StringSet::const_iterator it = ss.begin(); it != ss.end(); ++it) { + icu::UnicodeString us(it->c_str()); + if (us.countChar32() == 1) { + UChar32 this_unichar = us.char32At(0); + if (u_isalpha(this_unichar)) { + icu::UnicodeString upper_u = us; + upper_u.toUpper(); + std::string upper; + upper_u.toUTF8String(upper); + if (uppercases_seen.count(upper) != 0) { + continue; + } + uppercases_seen.insert(upper); + icu::UnicodeString lower_u = us; + lower_u.toLower(); + std::string lower; + lower_u.toUTF8String(lower); + lowercase.disjunct(HfstTransducer(upper, lower, t.get_type())); + } + } + } +#else + HfstTransducer lowercase(*lowerfy); +#endif + return lowercase; +} + +HfstTransducer PmatchUtilityTransducers::uppercaser_from_transducer(HfstTransducer & t) +{ +#if USE_GLIB_UNICODE + HfstTransducer uppercase(t.get_type()); + StringSet ss = t.get_alphabet(); + StringSet uppercases_seen; + for (StringSet::const_iterator it = ss.begin(); it != ss.end(); ++it) { + if (g_utf8_strlen(it->c_str(), -1) == 1) { + gunichar this_unichar = g_utf8_get_char_validated(it->c_str(), -1); + if (g_unichar_isalpha(this_unichar)) { + std::string upper = string_from_g_unichar(g_unichar_toupper(this_unichar)); + if (uppercases_seen.count(upper) != 0) { + continue; + } + uppercases_seen.insert(upper); + std::string lower = string_from_g_unichar(g_unichar_tolower(this_unichar)); + uppercase.disjunct(HfstTransducer(lower, upper, t.get_type())); + } + } + } +#elif USE_ICU_UNICODE + HfstTransducer uppercase(t.get_type()); + StringSet ss = t.get_alphabet(); + StringSet uppercases_seen; + for (StringSet::const_iterator it = ss.begin(); it != ss.end(); ++it) { + icu::UnicodeString us(it->c_str()); + if (us.countChar32() == 1) { + UChar32 this_unichar = us.char32At(0); + if (u_isalpha(this_unichar)) { + icu::UnicodeString upper_u = us; + upper_u.toUpper(); + std::string upper; + upper_u.toUTF8String(upper); + if (uppercases_seen.count(upper) != 0) { + continue; + } + uppercases_seen.insert(upper); + icu::UnicodeString lower_u = us; + lower_u.toLower(); + std::string lower; + lower_u.toUTF8String(lower); + uppercase.disjunct(HfstTransducer(lower, upper, t.get_type())); + } + } + } +#else + HfstTransducer uppercase(*capify); +#endif + return uppercase; +} + HfstTransducer * PmatchUtilityTransducers::cap(HfstTransducer & t, Side side, bool optional) { + bool orig_xerox_composition_value = hfst::get_xerox_composition(); + // This is to match flags in t with ?'s in "anything" + hfst::set_xerox_composition(true); + HfstTransducer * retval = NULL; - HfstTransducer cap(*capify); + HfstTransducer cap = uppercaser_from_transducer(t); HfstTransducer decap(cap); decap.invert(); HfstTransducer anything(HfstTransducer::identity_pair(t.get_type())); @@ -1717,7 +1899,7 @@ anything_but_whitespace_star.repeat_star(); if (optional == false) { // don't let lowercased first letters through - anything.subtract(*latin1_lowercase_acceptor); + anything.subtract(get_lowercase_acceptor_from_transducer(t)); } // As in the regexp // [[[["A":"a" [[\" "]* (" " "A":"a")]* ] .o. [{ab ad}:{ef eh}].u]] .o. @@ -1772,70 +1954,82 @@ retval->output_project(); } retval->minimize(); + hfst::set_xerox_composition(orig_xerox_composition_value); return retval; } HfstTransducer * PmatchUtilityTransducers::tolower(HfstTransducer & t, Side side, bool optional) { + bool orig_xerox_composition_value = hfst::get_xerox_composition(); + // This is to match flags in t with ?'s in "anything" + hfst::set_xerox_composition(true); + HfstTransducer anything(hfst::internal_identity, hfst::pmatch::format); if (optional == false) { - anything.subtract(*latin1_uppercase_acceptor); + anything.subtract(get_uppercase_acceptor_from_transducer(t)); } HfstTransducer * retval = NULL; if (side == Lower) { - HfstTransducer lowercase(*lowerfy); + HfstTransducer lowercase = lowercaser_from_transducer(t); lowercase.disjunct(anything); lowercase.repeat_star(); retval = new HfstTransducer(t); retval->compose(lowercase); } else if (side == Upper) { - retval = new HfstTransducer(*capify); + retval = new HfstTransducer(uppercaser_from_transducer(t)); retval->disjunct(anything); retval->repeat_star(); retval->compose(t); } else { // both - retval = new HfstTransducer(*capify); + retval = new HfstTransducer(uppercaser_from_transducer(t)); retval->disjunct(anything); retval->repeat_star(); retval->compose(t); - HfstTransducer lowercase(*lowerfy); + HfstTransducer lowercase = lowercaser_from_transducer(t); lowercase.disjunct(anything); lowercase.repeat_star(); retval->compose(lowercase); } retval->minimize(); + hfst::set_xerox_composition(orig_xerox_composition_value); return retval; } HfstTransducer * PmatchUtilityTransducers::toupper(HfstTransducer & t, Side side, bool optional) { + + bool orig_xerox_composition_value = hfst::get_xerox_composition(); + // This is to match flags in t with ?'s in "anything" + hfst::set_xerox_composition(true); + HfstTransducer anything(hfst::internal_identity, hfst::pmatch::format); if (optional == false) { - anything.subtract(*latin1_lowercase_acceptor); + anything.subtract(get_lowercase_acceptor_from_transducer(t)); } HfstTransducer * retval = NULL; if (side == Lower) { - HfstTransducer uppercase(*capify); + HfstTransducer uppercase = uppercaser_from_transducer(t); uppercase.disjunct(anything); uppercase.repeat_star(); retval = new HfstTransducer(t); retval->compose(uppercase); } else if (side == Upper) { - retval = new HfstTransducer(*lowerfy); + retval = new HfstTransducer(lowercaser_from_transducer(t)); retval->disjunct(anything); retval->repeat_star(); retval->compose(t); } else { // both - retval = new HfstTransducer(*lowerfy); + retval = new HfstTransducer(lowercaser_from_transducer(t)); retval->disjunct(anything); retval->repeat_star(); retval->compose(t); - HfstTransducer uppercase(*capify); + HfstTransducer uppercase = uppercaser_from_transducer(t); uppercase.disjunct(anything); uppercase.repeat_star(); retval->compose(uppercase); } retval->minimize(); + hfst::set_xerox_composition(orig_xerox_composition_value); return retval; } @@ -1899,7 +2093,11 @@ expansions_done.insert(*it); if (definitions.count(ins_name) != 0) { StringSet allowed, disallowed; - definitions[ins_name]->collect_initial_symbols_into(allowed, disallowed); + if (def_insed_expressions.count(ins_name) != 0) { + def_insed_expressions[ins_name]->collect_initial_symbols_into(allowed, disallowed); + } else { + definitions[ins_name]->collect_initial_symbols_into(allowed, disallowed); + } if (allowed.size() != 0) { expanded_symbols.insert(allowed.begin(), allowed.end()); } else { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/libhfst/src/parsers/pmatch_utils.h new/hfst-3.15.2/libhfst/src/parsers/pmatch_utils.h --- old/hfst-3.15.0/libhfst/src/parsers/pmatch_utils.h 2018-09-27 13:13:34.000000000 +0200 +++ new/hfst-3.15.2/libhfst/src/parsers/pmatch_utils.h 2019-04-04 17:33:27.000000000 +0200 @@ -27,6 +27,13 @@ #include "HfstXeroxRules.h" #include "xre_utils.h" +#if USE_GLIB_UNICODE +#include <glib.h> +#elif USE_ICU_UNICODE +#include <unicode/unistr.h> +#include <unicode/uchar.h> +#endif + void pmatchwarning(const char *msg); namespace hfst { namespace pmatch { @@ -404,6 +411,15 @@ HfstTransducer * make_capify( ImplementationType type = TROPICAL_OPENFST_TYPE); + // Unicode handling, if available + #if USE_GLIB_UNICODE + std::string string_from_g_unichar(gunichar ch); + #endif + HfstTransducer get_uppercase_acceptor_from_transducer(HfstTransducer & t); + HfstTransducer get_lowercase_acceptor_from_transducer(HfstTransducer & t); + HfstTransducer uppercaser_from_transducer(HfstTransducer & t); + HfstTransducer lowercaser_from_transducer(HfstTransducer & t); + HfstTransducer * cap(HfstTransducer & t, Side side = Both, bool optional = false); HfstTransducer * tolower(HfstTransducer & t, Side side = Both, @@ -420,7 +436,7 @@ HfstTransducer * cache; bool parent_is_context; PmatchObject(); - virtual ~PmatchObject() = default; + virtual ~PmatchObject() throw() = default; void start_timing() { if (verbose && name != "") { @@ -663,7 +679,7 @@ HfstTransducer * t; PmatchTransducerContainer(HfstTransducer * target): t(target) {} - ~PmatchTransducerContainer() { delete t; } + ~PmatchTransducerContainer() throw() { delete t; } HfstTransducer * evaluate() { if (t->get_type() != format) { t->convert(format); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/m4/ax_check_icu.m4 new/hfst-3.15.2/m4/ax_check_icu.m4 --- old/hfst-3.15.0/m4/ax_check_icu.m4 1970-01-01 01:00:00.000000000 +0100 +++ new/hfst-3.15.2/m4/ax_check_icu.m4 2019-04-04 17:33:27.000000000 +0200 @@ -0,0 +1,117 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_check_icu.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CHECK_ICU(version, action-if, action-if-not) +# +# DESCRIPTION +# +# Defines ICU_LIBS, ICU_CFLAGS, ICU_CXXFLAGS. See icu-config(1) man page. +# +# LICENSE +# +# Copyright (c) 2008 Akos Maroy <[email protected]> +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 7 + +AU_ALIAS([AC_CHECK_ICU], [AX_CHECK_ICU]) +AC_DEFUN([AX_CHECK_ICU], [ + succeeded=no + + if test -z "$ICU_CONFIG"; then + AC_PATH_PROG(ICU_CONFIG, icu-config, no) + fi + + if test -z "$PKG_CONFIG"; then + AC_PATH_PROG(PKG_CONFIG, pkg-config, no) + fi + + if test "$ICU_CONFIG" = "no" && test "$PKG_CONFIG" = "no" ; then + echo "*** Neither icu-config nor pkg-config could not be found. Make sure either is" + echo "*** in your path, and that taglib is properly installed." + echo "*** Or see http://ibm.com/software/globalization/icu/" + fi + + if test "$ICU_CONFIG" != "no" ; then + ICU_VERSION=`$ICU_CONFIG --version` + AC_MSG_CHECKING(for ICU >= $1 via icu-config) + VERSION_CHECK=`expr $ICU_VERSION \>\= $1` + if test "$VERSION_CHECK" = "1" ; then + AC_MSG_RESULT(yes) + succeeded=yes + + AC_MSG_CHECKING(ICU_CPPFLAGS) + ICU_CPPFLAGS=`$ICU_CONFIG --cppflags` + AC_MSG_RESULT($ICU_CPPFLAGS) + + AC_MSG_CHECKING(ICU_CFLAGS) + ICU_CFLAGS=`$ICU_CONFIG --cflags` + AC_MSG_RESULT($ICU_CFLAGS) + + AC_MSG_CHECKING(ICU_CXXFLAGS) + ICU_CXXFLAGS=`$ICU_CONFIG --cxxflags` + AC_MSG_RESULT($ICU_CXXFLAGS) + + AC_MSG_CHECKING(ICU_LIBS) + ICU_LIBS=`$ICU_CONFIG --ldflags` + AC_MSG_RESULT($ICU_LIBS) + else + ICU_CPPFLAGS="" + ICU_CFLAGS="" + ICU_CXXFLAGS="" + ICU_LIBS="" + ## If we have a custom action on failure, don't print errors, but + ## do set a variable so people can do so. + ifelse([$3], ,echo "can't find ICU >= $1 via icu-config",) + fi + fi + + if test "$succeeded" != "yes" && test "$PKG_CONFIG" != "no" ; then + AC_MSG_CHECKING(for ICU >= $1 via pkg-config) + if $PKG_CONFIG --atleast-version=$1 icu-i18n ; then + AC_MSG_RESULT(yes) + succeeded=yes + + AC_MSG_CHECKING(ICU_CPPFLAGS) + ICU_CPPFLAGS=`$PKG_CONFIG --variable=CPPFLAGS icu-i18n` + AC_MSG_RESULT($ICU_CPPFLAGS) + + AC_MSG_CHECKING(ICU_CFLAGS) + ICU_CFLAGS=`$PKG_CONFIG --variable=CFLAGS icu-i18n` + AC_MSG_RESULT($ICU_CFLAGS) + + AC_MSG_CHECKING(ICU_CXXFLAGS) + ICU_CXXFLAGS=`$PKG_CONFIG --variable=CXXFLAGS icu-i18n` + AC_MSG_RESULT($ICU_CXXFLAGS) + + AC_MSG_CHECKING(ICU_LIBS) + ICU_LIBS=`$PKG_CONFIG --libs icu-i18n` + AC_MSG_RESULT($ICU_LIBS) + else + ICU_CPPFLAGS="" + ICU_CFLAGS="" + ICU_CXXFLAGS="" + ICU_LIBS="" + ## If we have a custom action on failure, don't print errors, but + ## do set a variable so people can do so. + ifelse([$3], ,echo "can't find ICU >= $1 via pkg-config",) + fi + fi + + if test "$succeeded" = "yes"; then + AC_SUBST(ICU_CPPFLAGS) + AC_SUBST(ICU_CFLAGS) + AC_SUBST(ICU_CXXFLAGS) + AC_SUBST(ICU_LIBS) + ifelse([$2], , :, [$2]) + else + ifelse([$3], , AC_MSG_ERROR([Library requirements (ICU) not met.]), [$3]) + fi +]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/python/Makefile.am new/hfst-3.15.2/python/Makefile.am --- old/hfst-3.15.0/python/Makefile.am 2018-03-20 15:09:20.000000000 +0100 +++ new/hfst-3.15.2/python/Makefile.am 2019-11-12 10:56:09.000000000 +0100 @@ -7,6 +7,16 @@ # See the file COPYING included with this distribution for more # information. +SWIG_INTERFACE = libhfst.i + +BUILT_SOURCES = %_wrap.cpp.cpp + +%_wrap.cpp.cpp: $(SWIG_INTERFACE) setup.py + $(PYTHON) setup.py build + +install-exec-local: + $(PYTHON) setup.py install $(PYTHON_INSTALL_PARAMS) + SUBDIRS = test EXTRA_DIST = libhfst.i docstrings.i README setup.py \ hfst_extensions.cpp hfst_file_extensions.cpp hfst_lexc_extensions.cpp hfst_sfst_extensions.cpp \ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/python/hfst/__init__.py new/hfst-3.15.2/python/hfst/__init__.py --- old/hfst-3.15.0/python/hfst/__init__.py 2018-10-02 15:00:21.000000000 +0200 +++ new/hfst-3.15.2/python/hfst/__init__.py 2019-11-12 11:02:54.000000000 +0100 @@ -47,7 +47,7 @@ """ -__version__ = "3.15.0.0" +__version__ = "3.15.2.0" import hfst.exceptions import hfst.sfst_rules diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/python/libhfst.i new/hfst-3.15.2/python/libhfst.i --- old/hfst-3.15.0/python/libhfst.i 2018-10-03 11:45:58.000000000 +0200 +++ new/hfst-3.15.2/python/libhfst.i 2018-10-25 12:56:53.000000000 +0200 @@ -1714,7 +1714,7 @@ %extend { PmatchContainer(const std::string & filename) { - std::ifstream ifs(filename); + std::ifstream ifs(filename.c_str()); hfst_ol::PmatchContainer * retval = new hfst_ol::PmatchContainer(ifs); ifs.close(); return retval; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/python/setup.py new/hfst-3.15.2/python/setup.py --- old/hfst-3.15.0/python/setup.py 2018-10-02 15:00:52.000000000 +0200 +++ new/hfst-3.15.2/python/setup.py 2019-11-12 11:02:25.000000000 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 """ setup for HFST-swig @@ -50,7 +50,7 @@ ) setup(name = 'libhfst_swig', - version = '3.15.0_beta', + version = '3.15.2_beta', author = 'HFST team', author_email = '[email protected]', url = 'http://hfst.github.io/', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/python/test/test_att_reader.py new/hfst-3.15.2/python/test/test_att_reader.py --- old/hfst-3.15.0/python/test/test_att_reader.py 2017-06-08 11:11:37.000000000 +0200 +++ new/hfst-3.15.2/python/test/test_att_reader.py 2018-10-24 14:45:02.000000000 +0200 @@ -29,10 +29,16 @@ assert(len(transducers)) == 4 transducers = [] -with open('testfile_unicode.att', 'r') as f: - r = hfst.AttReader(f) - for tr in r: - transducers.append(tr) +if sys.version_info[0] < 3: + with open('testfile_unicode.att', 'rb') as f: + r = hfst.AttReader(f) + for tr in r: + transducers.append(tr) +else: + with open('testfile_unicode.att', 'r', encoding='utf-8') as f: + r = hfst.AttReader(f) + for tr in r: + transducers.append(tr) assert(f.closed) assert(len(transducers)) == 1 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/test/tools/proc-caps-in.strings new/hfst-3.15.2/test/tools/proc-caps-in.strings --- old/hfst-3.15.0/test/tools/proc-caps-in.strings 2017-03-08 12:58:26.000000000 +0100 +++ new/hfst-3.15.2/test/tools/proc-caps-in.strings 2019-04-04 17:33:27.000000000 +0200 @@ -4,3 +4,5 @@ TeSt tesT TEst +čest +Čest diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/test/tools/proc-caps-out1.strings new/hfst-3.15.2/test/tools/proc-caps-out1.strings --- old/hfst-3.15.0/test/tools/proc-caps-out1.strings 2017-03-08 12:58:26.000000000 +0100 +++ new/hfst-3.15.2/test/tools/proc-caps-out1.strings 2019-04-04 17:33:27.000000000 +0200 @@ -4,3 +4,5 @@ ^TeSt/Test+np/Test+n/Test+nlp$ ^tesT/test+n$ ^TEst/TEST+np/TEST+n/TEST+nlp$ +^čest/čest+n$ +^Čest/Čest+n$ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/test/tools/proc-caps-out3.strings new/hfst-3.15.2/test/tools/proc-caps-out3.strings --- old/hfst-3.15.0/test/tools/proc-caps-out3.strings 2017-03-08 12:58:26.000000000 +0100 +++ new/hfst-3.15.2/test/tools/proc-caps-out3.strings 2019-04-04 17:33:27.000000000 +0200 @@ -4,3 +4,5 @@ ^TeSt/*TeSt$ ^tesT/*tesT$ ^TEst/*TEst$ +^čest/čest+n$ +^Čest/*Čest$ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/test/tools/proc-caps-out4.strings new/hfst-3.15.2/test/tools/proc-caps-out4.strings --- old/hfst-3.15.0/test/tools/proc-caps-out4.strings 2017-03-08 12:58:26.000000000 +0100 +++ new/hfst-3.15.2/test/tools/proc-caps-out4.strings 2019-04-04 17:33:27.000000000 +0200 @@ -4,3 +4,5 @@ ^TeSt/Test+np/test+n/test+nlp$ ^tesT/test+n$ ^TEst/Test+np/test+n/test+nlp$ +^čest/čest+n$ +^Čest/čest+n$ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/test/tools/proc-caps-out5.strings new/hfst-3.15.2/test/tools/proc-caps-out5.strings --- old/hfst-3.15.0/test/tools/proc-caps-out5.strings 2017-03-08 12:58:26.000000000 +0100 +++ new/hfst-3.15.2/test/tools/proc-caps-out5.strings 2019-04-04 17:33:27.000000000 +0200 @@ -11,3 +11,7 @@ "*tesT" "<TEst>" "*TEst" +"<čest>" + "čest" n +"<Čest>" + "*Čest" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/test/tools/proc-caps.txt new/hfst-3.15.2/test/tools/proc-caps.txt --- old/hfst-3.15.0/test/tools/proc-caps.txt 2017-03-08 12:58:26.000000000 +0100 +++ new/hfst-3.15.2/test/tools/proc-caps.txt 2019-04-04 17:33:27.000000000 +0200 @@ -2,6 +2,7 @@ 0 2 T t 0 3 T T 0 4 t t +0 4 č č 1 5 F F 2 6 e e 3 7 e e diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/test/tools/tokenize-backtrack-out-giella-cg-contiguous.strings new/hfst-3.15.2/test/tools/tokenize-backtrack-out-giella-cg-contiguous.strings --- old/hfst-3.15.0/test/tools/tokenize-backtrack-out-giella-cg-contiguous.strings 2017-05-15 11:39:35.000000000 +0200 +++ new/hfst-3.15.2/test/tools/tokenize-backtrack-out-giella-cg-contiguous.strings 2019-02-15 10:51:28.000000000 +0100 @@ -1,9 +1,9 @@ "<su.>" - "." PUNCT <W:0.0000000000> "<.>" - "su" Adv Abbr <W:0.0000000000> "<su>" - "." PUNCT <W:0> "<.>" - "su" Prn <W:0> "<su>" + "." PUNCT <W:0.0> "<.>" + "su" Adv Abbr <W:0.0> "<su>" + "." PUNCT <W:0.0> "<.>" + "su" Prn <W:0.0> "<su>" : "<su>" - "su" Prn <W:0.0000000000> + "su" Prn <W:0.0> :\n diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/test/tools/tokenize-backtrack-out-giella-cg-spaces.strings new/hfst-3.15.2/test/tools/tokenize-backtrack-out-giella-cg-spaces.strings --- old/hfst-3.15.0/test/tools/tokenize-backtrack-out-giella-cg-spaces.strings 2017-05-15 11:39:35.000000000 +0200 +++ new/hfst-3.15.2/test/tools/tokenize-backtrack-out-giella-cg-spaces.strings 2019-02-15 10:51:28.000000000 +0100 @@ -1,14 +1,14 @@ "<njeallje logi guokte>" - "njealljelogiguokte" Num ErrSpace <W:0.0000000000> - "guokte" Num <W:0> "<guokte>" - "logi" Num <W:0> "< logi >" - "njeallje" Num <W:0> "<njeallje >" - "guokte" Num <W:0> "<guokte>" - "gi" N <W:0> "<gi >" - "lo" Num <W:0> "< lo>" - "njeallje" Num <W:0> "<njeallje >" - "guokte" Num <W:0> "<guokte>" - "gi" N <W:0> "< logi >" - "lo" Cmp <W:0> - "njeallje" Num <W:0> "<njeallje >" + "njealljelogiguokte" Num ErrSpace <W:0.0> + "guokte" Num <W:0.0> "<guokte>" + "logi" Num <W:0.0> "< logi >" + "njeallje" Num <W:0.0> "<njeallje >" + "guokte" Num <W:0.0> "<guokte>" + "gi" N <W:0.0> "<gi >" + "lo" Num <W:0.0> "< lo>" + "njeallje" Num <W:0.0> "<njeallje >" + "guokte" Num <W:0.0> "<guokte>" + "gi" N <W:0.0> "< logi >" + "lo" Cmp <W:0.0> + "njeallje" Num <W:0.0> "<njeallje >" :\n diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/test/tools/tokenize-backtrack-out-giella-cg.strings new/hfst-3.15.2/test/tools/tokenize-backtrack-out-giella-cg.strings --- old/hfst-3.15.0/test/tools/tokenize-backtrack-out-giella-cg.strings 2017-05-15 11:39:35.000000000 +0200 +++ new/hfst-3.15.2/test/tools/tokenize-backtrack-out-giella-cg.strings 2019-02-15 10:51:28.000000000 +0100 @@ -1,21 +1,21 @@ "<busse>" - "busse" N <W:0.0000000000> - "busset" V <W:0.0000000000> + "busse" N <W:0.0> + "busset" V <W:0.0> : "<skuvla>" - "skuvla" N <W:0.0000000000> + "skuvla" N <W:0.0> : "<skuvla busse>" - "skuvlabusse" N ErrSpace <W:0.0000000000> - "busset" V <W:0> "<busse>" - "skuvla" N <W:0> "<skuvla >" - "busse" N <W:0> "<busse>" - "skuvla" N <W:0> "<skuvla >" + "skuvlabusse" N ErrSpace <W:0.0> + "busset" V <W:0.0> "<busse>" + "skuvla" N <W:0.0> "<skuvla >" + "busse" N <W:0.0> "<busse>" + "skuvla" N <W:0.0> "<skuvla >" : "<Jan.>" - "Jan." N Abbr <W:0.0000000000> - "." PUNCT <W:0.0000000000> "<.>" - "Jan." N Abbr <W:0.0000000000> "<Jan>" - "." PUNCT <W:0> "<.>" - "Jan" N Prop <W:0> "<Jan>" + "Jan." N Abbr <W:0.0> + "." PUNCT <W:0.0> "<.>" + "Jan." N Abbr <W:0.0> "<Jan>" + "." PUNCT <W:0.0> "<.>" + "Jan" N Prop <W:0.0> "<Jan>" :\n diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/test/tools/tokenize-dog-out-giella-cg-flushing.strings new/hfst-3.15.2/test/tools/tokenize-dog-out-giella-cg-flushing.strings --- old/hfst-3.15.0/test/tools/tokenize-dog-out-giella-cg-flushing.strings 2017-05-15 11:39:35.000000000 +0200 +++ new/hfst-3.15.2/test/tools/tokenize-dog-out-giella-cg-flushing.strings 2019-02-15 10:51:28.000000000 +0100 @@ -1,5 +1,5 @@ "<dog>" - "dog" N <W:0.0000000000> + "dog" N <W:0.0> :[\\n<\\>] "<cat>" "cat" ? diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/test/tools/tokenize-dog-out-giella-cg-superblank.strings new/hfst-3.15.2/test/tools/tokenize-dog-out-giella-cg-superblank.strings --- old/hfst-3.15.0/test/tools/tokenize-dog-out-giella-cg-superblank.strings 2017-05-15 11:39:35.000000000 +0200 +++ new/hfst-3.15.2/test/tools/tokenize-dog-out-giella-cg-superblank.strings 2019-02-15 10:51:28.000000000 +0100 @@ -1,5 +1,5 @@ "<dog>" - "dog" N <W:0.0000000000> + "dog" N <W:0.0> :[\\n<\\>] "<cat>" "cat" ? diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/test/tools/tokenize-dog-out-giella-cg.strings new/hfst-3.15.2/test/tools/tokenize-dog-out-giella-cg.strings --- old/hfst-3.15.0/test/tools/tokenize-dog-out-giella-cg.strings 2017-05-15 11:39:35.000000000 +0200 +++ new/hfst-3.15.2/test/tools/tokenize-dog-out-giella-cg.strings 2019-02-15 10:51:28.000000000 +0100 @@ -2,14 +2,14 @@ "test" ? : "<dog>" - "dog" N <W:0.0000000000> + "dog" N <W:0.0> : "<be dog>" - "dog" N <W:0.0000000000> "<dog>" - "be" V <W:0.0000000000> "<be >" - "bedog" V <W:0.0000000000> + "dog" N <W:0.0> "<dog>" + "be" V <W:0.0> "<be >" + "bedog" V <W:0.0> : "<catdog>" - "dog" N <W:0.0000000000> - "cat" N Cmp <W:0.0000000000> + "dog" N <W:0.0> + "cat" N Cmp <W:0.0> : собака\n diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/tools/src/Makefile.am new/hfst-3.15.2/tools/src/Makefile.am --- old/hfst-3.15.0/tools/src/Makefile.am 2018-08-27 15:03:53.000000000 +0200 +++ new/hfst-3.15.2/tools/src/Makefile.am 2019-04-04 17:33:27.000000000 +0200 @@ -17,8 +17,8 @@ SUBDIRS=hfst-proc hfst-twolc hfst-tagger parsers AUTOMAKE_OPTIONS=std-options subdir-objects -LDADD = $(top_builddir)/libhfst/src/libhfst.la -AM_CPPFLAGS = -I${top_srcdir}/libhfst/src -I${top_srcdir}/libhfst/src/parsers -I${top_srcdir}/tools/src/parsers -Wno-sign-compare +LDADD = $(top_builddir)/libhfst/src/libhfst.la $(ICU_LIBS) +AM_CPPFLAGS = -I${top_srcdir}/libhfst/src -I${top_srcdir}/libhfst/src/parsers -I${top_srcdir}/tools/src/parsers -Wno-sign-compare ${GLIB_CPPFLAGS} ${ICU_CPPFLAGS} AM_CXXFLAGS = -Wno-deprecated -Wno-sign-compare # sort alphabetically: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/tools/src/hfst-info.cc new/hfst-3.15.2/tools/src/hfst-info.cc --- old/hfst-3.15.0/tools/src/hfst-info.cc 2017-03-30 15:31:41.000000000 +0200 +++ new/hfst-3.15.2/tools/src/hfst-info.cc 2019-04-04 17:33:27.000000000 +0200 @@ -252,6 +252,14 @@ "Required GLIB-based Unicode handling not present"); #endif } + else if ((*f == "icu") || (*f == "USE_ICU_UNICODE")) + { + verbose_printf("Requiring Unicode parsed by ICU"); +#ifndef USE_ICU_UNICODE + error(EXIT_FAILURE, 0, + "Required ICU-based Unicode handling not present"); +#endif + } else { error(EXIT_FAILURE, 0, "Required %s support is unrecognised " diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/tools/src/hfst-optimized-lookup.cc new/hfst-3.15.2/tools/src/hfst-optimized-lookup.cc --- old/hfst-3.15.0/tools/src/hfst-optimized-lookup.cc 2018-08-24 08:12:41.000000000 +0200 +++ new/hfst-3.15.2/tools/src/hfst-optimized-lookup.cc 2018-10-22 12:58:04.000000000 +0200 @@ -559,12 +559,6 @@ try { TransducerHeader header(f); TransducerAlphabet alphabet(f, header.symbol_count()); - - if (!feof(f)) - { - std::cerr << "!! Warning: file contains more than one transducer !!" << std::endl - << "!! This is currently not handled - using only the first one !!" << std::endl; - } if (header.probe_flag(Has_unweighted_input_epsilon_cycles) || header.probe_flag(Has_input_epsilon_cycles)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/tools/src/hfst-proc/Makefile.am new/hfst-3.15.2/tools/src/hfst-proc/Makefile.am --- old/hfst-3.15.0/tools/src/hfst-proc/Makefile.am 2018-08-28 13:29:30.000000000 +0200 +++ new/hfst-3.15.2/tools/src/hfst-proc/Makefile.am 2019-04-04 17:33:27.000000000 +0200 @@ -15,7 +15,7 @@ ## You should have received a copy of the GNU General Public License ## along with this program. If not, see <http://www.gnu.org/licenses/>. -AM_CPPFLAGS = -I${top_srcdir}/libhfst/src -I${top_srcdir}/lib -I${top_builddir}/lib $(GLIB_CPPFLAGS) +AM_CPPFLAGS = -I${top_srcdir}/libhfst/src -I${top_srcdir}/lib -I${top_builddir}/lib $(GLIB_CPPFLAGS) $(ICU_CPPFLAGS) if WANT_PROC MAYBE_PROC=hfst-apertium-proc @@ -27,7 +27,7 @@ bin_PROGRAMS=$(MAYBE_PROC) hfst_apertium_proc_SOURCES = hfst-proc.cc formatter.cc lookup-path.cc lookup-state.cc tokenizer.cc transducer.cc applicators.cc alphabet.cc -hfst_apertium_proc_LDADD = $(top_builddir)/libhfst/src/libhfst.la $(GLIB_LIBS) +hfst_apertium_proc_LDADD = $(top_builddir)/libhfst/src/libhfst.la $(GLIB_LIBS) $(ICU_LIBS) if WANT_PROC install-exec-hook: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/tools/src/hfst-proc/alphabet.cc new/hfst-3.15.2/tools/src/hfst-proc/alphabet.cc --- old/hfst-3.15.0/tools/src/hfst-proc/alphabet.cc 2018-08-23 13:26:39.000000000 +0200 +++ new/hfst-3.15.2/tools/src/hfst-proc/alphabet.cc 2019-04-04 17:33:27.000000000 +0200 @@ -16,6 +16,9 @@ #if USE_GLIB_UNICODE # include <glib.h> +#elif USE_ICU_UNICODE +# include <unicode/unistr.h> +# include <unicode/uchar.h> #endif #include <cstring> @@ -369,7 +372,26 @@ ProcTransducerAlphabet::caps_helper_single(const char* c, int& case_res) { #if USE_ICU_UNICODE -#error ICU unicode unimplemented + icu::UnicodeString us(c); + if (us.countChar32() == 1) { + UChar32 uc = us.char32At(0); + icu::UnicodeString cased_u = us; + std::string cased; + if (u_isupper(uc)) { + case_res = 1; + cased_u.toLower(); + cased_u.toUTF8String(cased); + return cased; + } + else if (u_islower(uc)) { + case_res = -1; + cased_u.toUpper(); + cased_u.toUTF8String(cased); + return cased; + } + } + case_res = 0; + return ""; #elif USE_GLIB_UNICODE glong readed = 0; glong written = 0; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/tools/src/hfst-tagger/src/hfst_tagger_compute_data_statistics.py new/hfst-3.15.2/tools/src/hfst-tagger/src/hfst_tagger_compute_data_statistics.py --- old/hfst-3.15.0/tools/src/hfst-tagger/src/hfst_tagger_compute_data_statistics.py 2017-03-08 12:58:26.000000000 +0100 +++ new/hfst-3.15.2/tools/src/hfst-tagger/src/hfst_tagger_compute_data_statistics.py 2019-11-12 10:56:09.000000000 +0100 @@ -1,4 +1,4 @@ -#! /usr/bin/python +#! /usr/bin/python3 # @file hfst_tagger_compute_data_statistics.py # @@ -127,8 +127,8 @@ try: tagger_aux.check_line(line,2) except Exception as e: - print e.message + " " + str(line_number) + ":" - print line + print(e.message + " " + str(line_number) + ":") + print(line) exit(1) # Split the line into fields. @@ -179,70 +179,70 @@ tagger_aux.verbose_print("Storing lexical statistics.",verbose) tagger_aux.verbose_print("P(WORD_FORM | TAG)",verbose) -print "START P(WORD_FORM | TAG)" +print("START P(WORD_FORM | TAG)") tagger_aux.print_conditional_penalties(word_form_and_tag_map, entry_tag_map, "", False, False) -print "STOP P(WORD_FORM | TAG)" +print("STOP P(WORD_FORM | TAG)") # Compute and display the penalties for suffix and tag combinations. tagger_aux.verbose_print("P(LOWER_SUFFIX_AND_TAG | LOWER_SUFFIX)",verbose) -print "START P(LOWER_SUFFIX_AND_TAG | LOWER_SUFFIX)" +print("START P(LOWER_SUFFIX_AND_TAG | LOWER_SUFFIX)") tagger_aux.print_conditional_penalties(lower_suffix_and_tag_count_map, lower_suffix_count_map, "<lower_suffix_and_tag>", True, False) -print "STOP P(LOWER_SUFFIX_AND_TAG | LOWER_SUFFIX)" +print("STOP P(LOWER_SUFFIX_AND_TAG | LOWER_SUFFIX)") # Compute and display the penalties for suffixes. tagger_aux.verbose_print("P(LOWER_SUFFIX)",verbose) -print "START P(LOWER_SUFFIX)" +print("START P(LOWER_SUFFIX)") tagger_aux.print_penalties(lower_suffix_count_map, number_of_lower_suffixes, "<lower_suffix>") -print "STOP P(LOWER_SUFFIX)" +print("STOP P(LOWER_SUFFIX)") # Compute and display the penalties for tags. tagger_aux.verbose_print("P(LOWER_TAG)",verbose) -print "START P(LOWER_TAG)" +print("START P(LOWER_TAG)") number_of_lower_tags = number_of_lower_suffixes tagger_aux.print_penalties(lower_tag_count_map, number_of_lower_suffixes, "<lower_tag>") -print "STOP P(LOWER_TAG)" +print("STOP P(LOWER_TAG)") # Compute and display the penalties for suffix and tag combinations. -print "START P(UPPER_SUFFIX_AND_TAG | UPPER_SUFFIX)" +print("START P(UPPER_SUFFIX_AND_TAG | UPPER_SUFFIX)") tagger_aux.print_conditional_penalties(upper_suffix_and_tag_count_map, upper_suffix_count_map, "<upper_suffix_and_tag>", True, False) -print "STOP P(UPPER_SUFFIX_AND_TAG | UPPER_SUFFIX)" +print("STOP P(UPPER_SUFFIX_AND_TAG | UPPER_SUFFIX)") # Compute and display the penalties for suffixes. tagger_aux.verbose_print("P(UPPER_SUFFIX)",verbose) -print "START P(UPPER_SUFFIX)" +print("START P(UPPER_SUFFIX)") tagger_aux.print_penalties(upper_suffix_count_map, number_of_upper_suffixes, "<upper_suffix>") -print "STOP P(UPPER_SUFFIX)" +print("STOP P(UPPER_SUFFIX)") # Compute and display the penalties for tags. tagger_aux.verbose_print("P(UPPER_TAG)",verbose) -print "START P(UPPER_TAG)" +print("START P(UPPER_TAG)") number_of_tags = number_of_upper_suffixes tagger_aux.print_penalties(upper_tag_count_map, number_of_upper_suffixes, "<upper_tag>") -print "STOP P(UPPER_TAG)" +print("STOP P(UPPER_TAG)") ## CONSTRUCT TAG SEQUENCE TABLE. @@ -277,7 +277,7 @@ start_tag = "START " + model_order_tag + " " + statistics_patterns[i].name stop_tag = "STOP " + model_order_tag + " " + statistics_patterns[i].name - print start_tag + print(start_tag) tagger_aux.print_conditional_penalties(counters[i][0], counters[i][1], @@ -285,7 +285,7 @@ False, True) - print stop_tag + print(stop_tag) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/tools/src/hfst-twolc/src/Makefile.am new/hfst-3.15.2/tools/src/hfst-twolc/src/Makefile.am --- old/hfst-3.15.0/tools/src/hfst-twolc/src/Makefile.am 2018-08-22 13:45:20.000000000 +0200 +++ new/hfst-3.15.2/tools/src/hfst-twolc/src/Makefile.am 2019-04-04 17:33:27.000000000 +0200 @@ -9,7 +9,7 @@ endif AM_CPPFLAGS = -I${top_srcdir}/libhfst/src -I${top_srcdir}/libhfst/src/parsers -LDADD = $(top_builddir)/libhfst/src/libhfst.la +LDADD = $(top_builddir)/libhfst/src/libhfst.la $(ICU_LIBS) # the preprocessor scripts do not support options per se, so we whitelist # them here diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/hfst-3.15.0/tools/src/parsers/Makefile.am new/hfst-3.15.2/tools/src/parsers/Makefile.am --- old/hfst-3.15.0/tools/src/parsers/Makefile.am 2017-03-08 12:58:26.000000000 +0100 +++ new/hfst-3.15.2/tools/src/parsers/Makefile.am 2019-04-04 17:33:27.000000000 +0200 @@ -19,7 +19,7 @@ hfst_xfst_SOURCES = hfst-xfst.cc $(HFST_COMMON_SRC) -AM_CPPFLAGS = -I${top_srcdir}/libhfst/src -I${top_srcdir}/libhfst/src/parsers -I${top_srcdir}/tools/src $(GLIB_CPPFLAGS) -Wno-deprecated +AM_CPPFLAGS = -I${top_srcdir}/libhfst/src -I${top_srcdir}/libhfst/src/parsers -I${top_srcdir}/tools/src $(GLIB_CPPFLAGS) $(ICU_CPPFLAGS) -Wno-deprecated if WANT_XFST MAYBE_XFST=hfst-xfst @@ -30,9 +30,9 @@ bin_PROGRAMS=$(MAYBE_XFST) if WANT_READLINE - hfst_xfst_LDADD = $(top_builddir)/libhfst/src/libhfst.la $(GLIB_LIBS) -lreadline + hfst_xfst_LDADD = $(top_builddir)/libhfst/src/libhfst.la $(GLIB_LIBS) $(ICU_LIBS) -lreadline else - hfst_xfst_LDADD = $(top_builddir)/libhfst/src/libhfst.la $(GLIB_LIBS) + hfst_xfst_LDADD = $(top_builddir)/libhfst/src/libhfst.la $(GLIB_LIBS) $(ICU_LIBS) endif EXTRA_DIST=init_help.cc cmd.h abbrcmd.h
