Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package lttoolbox for openSUSE:Factory 
checked in at 2023-12-28 23:03:08
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/lttoolbox (Old)
 and      /work/SRC/openSUSE:Factory/.lttoolbox.new.28375 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "lttoolbox"

Thu Dec 28 23:03:08 2023 rev:6 rq:1135408 version:3.7.6

Changes:
--------
--- /work/SRC/openSUSE:Factory/lttoolbox/lttoolbox.changes      2022-11-01 
13:43:48.440291591 +0100
+++ /work/SRC/openSUSE:Factory/.lttoolbox.new.28375/lttoolbox.changes   
2023-12-28 23:04:51.581890035 +0100
@@ -1,0 +2,6 @@
+Thu Dec 28 02:43:38 UTC 2023 - Jan Engelhardt <jeng...@inai.de>
+
+- Update to release 3.7.6
+  * Add option to set compound_max_elements in lt-proc
+
+-------------------------------------------------------------------

Old:
----
  v3.7.1.tar.gz

New:
----
  v3.7.6.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ lttoolbox.spec ++++++
--- /var/tmp/diff_new_pack.JwFSkN/_old  2023-12-28 23:04:52.005905531 +0100
+++ /var/tmp/diff_new_pack.JwFSkN/_new  2023-12-28 23:04:52.005905531 +0100
@@ -1,7 +1,7 @@
 #
 # spec file for package lttoolbox
 #
-# Copyright (c) 2022 SUSE LLC
+# Copyright (c) 2023 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -19,7 +19,7 @@
 Name:           lttoolbox
 %define lname   liblttoolbox3
 Summary:        Toolbox for lexical processing and morphological analysis
-Version:        3.7.1
+Version:        3.7.6
 Release:        0
 License:        GPL-2.0-or-later
 Group:          Productivity/Scientific/Other

++++++ v3.7.1.tar.gz -> v3.7.6.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/CMakeLists.txt 
new/lttoolbox-3.7.6/CMakeLists.txt
--- old/lttoolbox-3.7.1/CMakeLists.txt  2022-11-01 09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/CMakeLists.txt  1970-01-01 01:00:00.000000000 +0100
@@ -1,149 +0,0 @@
-cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
-cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION})
-project(lttoolbox
-       VERSION 3.7.0
-       LANGUAGES CXX C
-       )
-set(VERSION ${PROJECT_VERSION})
-set(VERSION_ABI 3)
-set(PACKAGE_BUGREPORT "apertium-st...@lists.sourceforge.net")
-
-add_definitions("-DPACKAGE_VERSION=\"${PROJECT_VERSION}\"")
-
-set(MASTER_PROJECT OFF)
-if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
-       set(MASTER_PROJECT ON)
-endif ()
-
-# Release or Debug
-if(MASTER_PROJECT AND NOT CMAKE_BUILD_TYPE)
-       set(CMAKE_BUILD_TYPE "Release")
-endif()
-
-set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
-set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-set(CMAKE_MACOSX_RPATH ON)
-
-include(GNUInstallDirs)
-
-option(BUILD_SHARED_LIBS "Set to OFF to use static library" ON)
-option(BUILD_TESTING "Set to OFF to disable tests" ON)
-option(ENABLE_PYTHON_BINDINGS "Set to ON to build the Python wrapper" OFF)
-
-if(MSVC)
-       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8 /std:c++latest 
/Zc:__cplusplus /permissive- /W4 /MP")
-       set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2")
-       set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} 
/LTCG")
-       set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS})
-       set(CMAKE_C_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
-else()
-       set(_FLAGS_COMMON "-Wall -Wextra -Wno-missing-field-initializers 
-Wno-deprecated -Wno-unused-parameter -fPIC")
-
-       include(CheckCCompilerFlag)
-       include(CheckCXXCompilerFlag)
-
-       foreach(flag "-Wno-unused-result" "-flto")
-               string(REGEX REPLACE "[^A-Za-z0-9]" "-" _flag ${flag})
-               CHECK_CXX_COMPILER_FLAG(${flag} COMPILER_SUPPORTS_${_flag})
-               if(COMPILER_SUPPORTS_${_flag})
-                       set(_FLAGS_COMMON "${_FLAGS_COMMON} ${flag}")
-               endif()
-       endforeach()
-       if(COMPILER_SUPPORTS_flto)
-               set(CMAKE_EXE_LINKER_FLAGS_RELEASE 
"${CMAKE_EXE_LINKER_FLAGS_RELEASE} -flto")
-       endif()
-
-       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_FLAGS_COMMON} 
-fvisibility-inlines-hidden")
-
-       # Enable latest possible C standard
-       foreach(flag "-std=c2x" "-std=c11" "-std=c1x" "-std=c99")
-               string(REGEX REPLACE "[^a-z0-9]" "-" _flag ${flag})
-               CHECK_C_COMPILER_FLAG(${flag} COMPILER_SUPPORTS_${_flag})
-               if(COMPILER_SUPPORTS_${_flag})
-                       set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}")
-                       break()
-               endif()
-       endforeach()
-
-       # Require latest possible C++ standard
-       foreach(flag "-std=c++23" "-std=c++2b"  "-std=c++20" "-std=c++2a" 
"-std=c++17")
-               string(REGEX REPLACE "[^a-z0-9]" "-" _flag ${flag})
-               CHECK_CXX_COMPILER_FLAG(${flag} COMPILER_SUPPORTS_${_flag})
-               if(COMPILER_SUPPORTS_${_flag})
-                       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}")
-                       set(_ENABLED_CXX ${flag})
-                       break()
-               endif()
-       endforeach()
-       if(NOT _ENABLED_CXX)
-               message(FATAL_ERROR "Could not enable at least C++17 - upgrade 
your compiler")
-       endif()
-
-       # Generate pkg-config file
-       set(prefix      ${CMAKE_INSTALL_PREFIX})
-       set(exec_prefix "\${prefix}")
-       set(libdir      "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
-       set(includedir  "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}")
-       configure_file(lttoolbox.pc.in lttoolbox.pc @ONLY)
-       install(FILES "${CMAKE_CURRENT_BINARY_DIR}/lttoolbox.pc" DESTINATION 
"${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
-endif()
-
-try_compile(SIZET_NOT_CSTDINT 
${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp SOURCES 
${CMAKE_CURRENT_SOURCE_DIR}/lttoolbox/check-cstdint.cc)
-if(SIZET_NOT_CSTDINT)
-       add_definitions(-DSIZET_NOT_CSTDINT)
-endif()
-
-find_package(LibXml2 REQUIRED)
-include_directories(${LIBXML2_INCLUDE_DIR})
-
-if(WIN32)
-       add_definitions(-D_SECURE_SCL=0 -D_ITERATOR_DEBUG_LEVEL=0 
-D_CRT_SECURE_NO_DEPRECATE -DWIN32_LEAN_AND_MEAN -DVC_EXTRALEAN -DNOMINMAX)
-       add_definitions(-DSTDC_HEADERS -DREGEX_MALLOC)
-       include_directories("lttoolbox/win32")
-else()
-       add_definitions(-D_POSIX_C_SOURCE=200112 -D_GNU_SOURCE)
-endif()
-
-if(NOT APPLE)
-       find_package(Threads REQUIRED)
-endif()
-
-# Unlocked I/O functions
-include(CheckSymbolExists)
-set(CMAKE_REQUIRED_DEFINITIONS -D_POSIX_C_SOURCE=200112 -D_GNU_SOURCE)
-foreach(func fread_unlocked fwrite_unlocked fgetc_unlocked fputc_unlocked 
fputs_unlocked)
-       string(TOUPPER ${func} _uc)
-       CHECK_SYMBOL_EXISTS(${func} "stdio.h" HAVE_DECL_${_uc})
-       if(HAVE_DECL_${_uc})
-               add_definitions(-DHAVE_DECL_${_uc})
-       endif()
-endforeach()
-unset(CMAKE_REQUIRED_DEFINITIONS)
-
-# getopt
-find_path(GETOPT_INCLUDE getopt.h)
-include_directories(${GETOPT_INCLUDE})
-if(VCPKG_TOOLCHAIN)
-       find_library(GETOPT_LIB NAMES getopt)
-       add_definitions(-DHAVE_GETOPT_LONG)
-else()
-       set(GETOPT_LIB)
-endif()
-
-# ICU
-find_package(ICU COMPONENTS i18n io uc REQUIRED)
-
-include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-
-if(BUILD_TESTING)
-       enable_testing()
-       find_package(PythonInterp 3.5 REQUIRED)
-       set(ENV{CTEST_OUTPUT_ON_FAILURE} 1)
-       set(CMAKE_CTEST_ARGUMENTS "-VV")
-endif()
-
-add_subdirectory(lttoolbox)
-
-if(ENABLE_PYTHON_BINDINGS)
-       add_subdirectory(python)
-endif()
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/cmake.sh new/lttoolbox-3.7.6/cmake.sh
--- old/lttoolbox-3.7.1/cmake.sh        2022-11-01 09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/cmake.sh        1970-01-01 01:00:00.000000000 +0100
@@ -1,29 +0,0 @@
-#!/usr/bin/env bash
-set -e
-args=()
-
-while [[ $# > 0 ]];
-do
-       case "$1" in
-       --prefix)
-               args+=("-DCMAKE_INSTALL_PREFIX=$2")
-               shift 2
-               ;;
-       --prefix=*)
-               args+=("-DCMAKE_INSTALL_PREFIX=${1#*=}")
-               shift
-               ;;
-       *)
-               args+=("$1")
-               shift
-               ;;
-       esac
-done
-
-set -- "${args[@]}"
-
-echo "- rm -rf CMake caches"
-rm -rf install_manifest.txt CMakeCache.txt *.cmake CMakeFiles 
lttoolbox/CMakeFiles lttoolbox/*.cmake _CPack_Packages Testing
-echo "- cmake " "$@" "."
-cmake "$@" .
-echo "- You may now perform: make -j3"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/configure.ac 
new/lttoolbox-3.7.6/configure.ac
--- old/lttoolbox-3.7.1/configure.ac    2022-11-01 09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/configure.ac    2023-12-27 21:15:14.000000000 +0100
@@ -2,7 +2,7 @@
 
 m4_define([PKG_VERSION_MAJOR], [3])
 m4_define([PKG_VERSION_MINOR], [7])
-m4_define([PKG_VERSION_PATCH], [1])
+m4_define([PKG_VERSION_PATCH], [6])
 
 # Bump if the ABI (not API) changed in a backwards-incompatible manner
 m4_define([PKG_VERSION_ABI], [3])
@@ -61,8 +61,7 @@
 # Checks for library functions.
 AC_FUNC_ERROR_AT_LINE
 
-AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, \
-fputc_unlocked, fputs_unlocked])
+AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, 
fputc_unlocked, fputs_unlocked, fmemopen])
 
 AC_CHECK_FUNCS([setlocale strdup getopt_long])
 
@@ -74,7 +73,7 @@
   version_flag="-std=c++${version}"
   AX_CHECK_COMPILE_FLAG([${version_flag}], [break], [version_flag=none])
 done
-AS_IF([test "$version_flag" == none], [
+AS_IF([test "$version_flag" = none], [
   AC_MSG_ERROR([Could not enable at least C++17 - upgrade your compiler])
 ])
 CXXFLAGS="$CXXFLAGS ${version_flag}"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/CMakeLists.txt 
new/lttoolbox-3.7.6/lttoolbox/CMakeLists.txt
--- old/lttoolbox-3.7.1/lttoolbox/CMakeLists.txt        2022-11-01 
09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/lttoolbox/CMakeLists.txt        1970-01-01 
01:00:00.000000000 +0100
@@ -1,128 +0,0 @@
-set(LIBLTTOOLBOX_HEADERS
-       acx.h
-       alphabet.h
-       att_compiler.h
-       buffer.h
-       cli.h
-       compiler.h
-       compression.h
-       deserialiser.h
-       entry_token.h
-       exception.h
-       expander.h
-       file_utils.h
-       fst_processor.h
-       input_file.h
-       lt_locale.h
-       match_exe.h
-       match_node.h
-       match_state.h
-       my_stdio.h
-       node.h
-       pattern_list.h
-       regexp_compiler.h
-       serialiser.h
-       sorted_vector.h
-       sorted_vector.hpp
-       state.h
-       string_utils.h
-       tmx_compiler.h
-       transducer.h
-       trans_exe.h
-       ustring.h
-       xml_parse_util.h
-       xml_walk_util.h
-       )
-set(LIBLTTOOLBOX_SOURCES
-       acx.cc
-       alphabet.cc
-       att_compiler.cc
-       cli.cc
-       compiler.cc
-       compression.cc
-       entry_token.cc
-       expander.cc
-       file_utils.cc
-       fst_processor.cc
-       input_file.cc
-       lt_locale.cc
-       match_exe.cc
-       match_node.cc
-       match_state.cc
-       node.cc
-       pattern_list.cc
-       regexp_compiler.cc
-       sorted_vector.cc
-       state.cc
-       string_utils.cc
-       tmx_compiler.cc
-       transducer.cc
-       trans_exe.cc
-       ustring.cc
-       xml_parse_util.cc
-       xml_walk_util.cc
-       ${LIBLTTOOLBOX_HEADERS}
-       )
-if(WIN32)
-       set(LIBLTTOOLBOX_SOURCES
-               win32/libgen.c
-               win32/libgen.h
-               win32/regex.c
-               win32/regex.h
-               win32/unistd.h
-               ${LIBLTTOOLBOX_SOURCES}
-               )
-       if(NOT VCPKG_TOOLCHAIN)
-               set(LIBLTTOOLBOX_SOURCES
-                       win32/getopt.c
-                       win32/getopt.h
-                       ${LIBLTTOOLBOX_SOURCES}
-                       )
-       endif()
-else()
-       set(GETOPT)
-endif()
-
-add_library(lttoolbox ${LIBLTTOOLBOX_SOURCES})
-target_compile_definitions(lttoolbox PRIVATE LTTOOLBOX_EXPORTS)
-set_target_properties(lttoolbox PROPERTIES SOVERSION ${VERSION_ABI})
-target_link_libraries(lttoolbox ${LIBXML2_LIBRARIES} ${ICU_LIBRARIES} 
${CMAKE_THREAD_LIBS_INIT})
-
-add_executable(lt-print lt_print.cc)
-target_link_libraries(lt-print lttoolbox ${GETOPT_LIB})
-
-add_executable(lt-trim lt_trim.cc)
-target_link_libraries(lt-trim lttoolbox ${GETOPT_LIB})
-
-add_executable(lt-comp lt_comp.cc)
-target_link_libraries(lt-comp lttoolbox ${GETOPT_LIB})
-
-add_executable(lt-proc lt_proc.cc)
-target_link_libraries(lt-proc lttoolbox ${GETOPT_LIB})
-
-add_executable(lt-expand lt_expand.cc)
-target_link_libraries(lt-expand lttoolbox ${GETOPT_LIB})
-
-add_executable(lt-tmxcomp lt_tmxcomp.cc)
-target_link_libraries(lt-tmxcomp lttoolbox ${GETOPT_LIB})
-
-add_executable(lt-tmxproc lt_tmxproc.cc)
-target_link_libraries(lt-tmxproc lttoolbox ${GETOPT_LIB})
-
-if(BUILD_TESTING)
-       add_test(NAME tests COMMAND ${PYTHON_EXECUTABLE} 
"${CMAKE_SOURCE_DIR}/tests/run_tests.py" $<TARGET_FILE_DIR:lt-comp>)
-       set_tests_properties(tests PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED")
-endif()
-
-install(TARGETS lttoolbox
-       ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION 
${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-install(FILES ${LIBLTTOOLBOX_HEADERS}
-       DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/lttoolbox)
-install(TARGETS lt-print lt-trim lt-comp lt-proc lt-expand lt-tmxcomp 
lt-tmxproc
-       RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-
-install(FILES dix.dtd dix.rng dix.rnc acx.rng xsd/dix.xsd xsd/acx.xsd
-       DESTINATION ${CMAKE_INSTALL_DATADIR}/lttoolbox)
-
-install(FILES lt-comp.1 lt-expand.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1 
lt-print.1 lt-trim.1
-       DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/Makefile.am 
new/lttoolbox-3.7.6/lttoolbox/Makefile.am
--- old/lttoolbox-3.7.1/lttoolbox/Makefile.am   2022-11-01 09:36:47.000000000 
+0100
+++ new/lttoolbox-3.7.6/lttoolbox/Makefile.am   2023-12-27 21:15:14.000000000 
+0100
@@ -2,13 +2,13 @@
 h_sources = acx.h alphabet.h att_compiler.h buffer.h cli.h compiler.h 
compression.h  \
             deserialiser.h entry_token.h expander.h file_utils.h 
fst_processor.h input_file.h lt_locale.h \
             match_exe.h match_node.h match_state.h my_stdio.h node.h \
-            pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h 
state.h string_utils.h \
+            pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h 
state.h string_utils.h symbol_iter.h \
             transducer.h trans_exe.h xml_parse_util.h xml_walk_util.h 
exception.h tmx_compiler.h \
             ustring.h sorted_vector.hpp
 cc_sources = acx.cc alphabet.cc att_compiler.cc cli.cc compiler.cc 
compression.cc entry_token.cc \
              expander.cc file_utils.cc fst_processor.cc input_file.cc 
lt_locale.cc match_exe.cc \
              match_node.cc match_state.cc node.cc pattern_list.cc \
-             regexp_compiler.cc sorted_vector.cc state.cc string_utils.cc 
transducer.cc \
+             regexp_compiler.cc sorted_vector.cc state.cc string_utils.cc 
symbol_iter.cc transducer.cc \
              trans_exe.cc xml_parse_util.cc xml_walk_util.cc tmx_compiler.cc 
ustring.cc
 
 library_includedir = $(includedir)/$(PACKAGE_NAME)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/acx.h 
new/lttoolbox-3.7.6/lttoolbox/acx.h
--- old/lttoolbox-3.7.1/lttoolbox/acx.h 2022-11-01 09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/lttoolbox/acx.h 2023-12-27 21:15:14.000000000 +0100
@@ -18,6 +18,7 @@
 #define _ACXPARSEUTIL_
 
 #include <lttoolbox/sorted_vector.hpp>
+#include <cstdint>
 #include <map>
 
 std::map<int32_t, sorted_vector<int32_t>> readACX(const char* file);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/alphabet.cc 
new/lttoolbox-3.7.6/lttoolbox/alphabet.cc
--- old/lttoolbox-3.7.1/lttoolbox/alphabet.cc   2022-11-01 09:36:47.000000000 
+0100
+++ new/lttoolbox-3.7.6/lttoolbox/alphabet.cc   2023-12-27 21:15:14.000000000 
+0100
@@ -19,6 +19,7 @@
 #include <lttoolbox/my_stdio.h>
 #include <lttoolbox/serialiser.h>
 #include <lttoolbox/deserialiser.h>
+#include <lttoolbox/symbol_iter.h>
 
 #include <cctype>
 #include <cstdlib>
@@ -311,24 +312,9 @@
 Alphabet::tokenize(UStringView str) const
 {
   std::vector<int32_t> ret;
-  size_t end = str.size();
-  size_t i = 0;
-  UChar32 c;
-  while (i < end) {
-    U16_NEXT(str.data(), i, end, c);
-    if (c == '\\') {
-    } else if (c == '<') {
-      size_t j = i;
-      while (c != '>' && j < end) {
-        U16_NEXT(str.data(), j, end, c);
-      }
-      if (c == '>') {
-        ret.push_back(operator()(str.substr(i-1, j-i+1)));
-        i = j;
-      }
-    } else {
-      ret.push_back(static_cast<int32_t>(c));
-    }
+  for (auto sym : symbol_iter(str)) {
+    if (sym.size() > 1) ret.push_back(operator()(sym));
+    else ret.push_back(static_cast<int32_t>(sym[0]));
   }
   return ret;
 }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/cli.cc 
new/lttoolbox-3.7.6/lttoolbox/cli.cc
--- old/lttoolbox-3.7.1/lttoolbox/cli.cc        2022-11-01 09:36:47.000000000 
+0100
+++ new/lttoolbox-3.7.6/lttoolbox/cli.cc        2023-12-27 21:15:14.000000000 
+0100
@@ -63,15 +63,15 @@
   epilog = e;
 }
 
-void CLI::print_usage()
+void CLI::print_usage(std::ostream& out)
 {
   if (!prog_name.empty()) {
-    std::cout << prog_name;
+    out << prog_name;
     if (!version.empty()) {
-      std::cout << " v" << version;
+      out << " v" << version;
     }
-    std::cout << ": " << description << std::endl;
-    std::cout << "USAGE: " << prog_name;
+    out << ": " << description << std::endl;
+    out << "USAGE: " << prog_name;
     std::string bargs;
     std::string sargs;
     for (auto& it : options) {
@@ -86,34 +86,34 @@
       }
     }
     if (!bargs.empty()) {
-      std::cout << " [-" << bargs << "]";
+      out << " [-" << bargs << "]";
     }
-    std::cout << sargs;
+    out << sargs;
     int depth = 0;
     for (auto& it : file_args) {
-      std::cout << ' ';
+      out << ' ';
       if (it.second) {
-        std::cout << '[';
+        out << '[';
         depth += 1;
       }
-      std::cout << it.first;
+      out << it.first;
     }
-    while (depth-- > 0) std::cout << "]";
-    std::cout << std::endl;
+    while (depth-- > 0) out << "]";
+    out << std::endl;
     for (auto& it : options) {
-      std::cout << "  -" << it.short_opt;
+      out << "  -" << it.short_opt;
 #if HAVE_GETOPT_LONG
-      std::cout << ", --" << it.long_opt << ':';
+      out << ", --" << it.long_opt << ':';
       for (size_t i = it.long_opt.size(); i < 20; i++) {
-        std::cout << ' ';
+        out << ' ';
       }
 #else
-      std::cout << ":    ";
+      out << ":    ";
 #endif
-      std::cout << it.desc << std::endl;
+      out << it.desc << std::endl;
     }
     if (!epilog.empty()) {
-      std::cout << epilog << std::endl;
+      out << epilog << std::endl;
     }
   }
   exit(EXIT_FAILURE);
@@ -162,8 +162,11 @@
         break;
       }
     }
-    if (!found || cnt == 'h') {
-      print_usage();
+    if (!found) {
+      print_usage(std::cerr);
+    }
+    else if (cnt == 'h') {
+      print_usage(std::cout);
     }
   }
   while (optind < argc) {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/cli.h 
new/lttoolbox-3.7.6/lttoolbox/cli.h
--- old/lttoolbox-3.7.1/lttoolbox/cli.h 2022-11-01 09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/lttoolbox/cli.h 2023-12-27 21:15:14.000000000 +0100
@@ -18,6 +18,7 @@
 #include <string>
 #include <vector>
 #include <map>
+#include <iostream>
 
 class CLI {
 private:
@@ -52,7 +53,7 @@
   void add_bool_arg(char short_flag, std::string long_flag, std::string desc);
   void add_file_arg(std::string name, bool optional = true);
   void set_epilog(std::string e);
-  void print_usage();
+  void print_usage(std::ostream& out = std::cerr);
   void parse_args(int argc, char* argv[]);
   std::map<std::string, std::vector<std::string>>& get_strs();
   std::map<std::string, bool>& get_bools();
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/fst_processor.cc 
new/lttoolbox-3.7.6/lttoolbox/fst_processor.cc
--- old/lttoolbox-3.7.1/lttoolbox/fst_processor.cc      2022-11-01 
09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/lttoolbox/fst_processor.cc      2023-12-27 
21:15:14.000000000 +0100
@@ -20,6 +20,7 @@
 #include <lttoolbox/xml_parse_util.h>
 #include <lttoolbox/file_utils.h>
 #include <lttoolbox/string_utils.h>
+#include <lttoolbox/symbol_iter.h>
 
 #include <iostream>
 #include <cerrno>
@@ -1806,10 +1807,38 @@
   }
 }
 
+bool
+FSTProcessor::step_biltrans(UStringView word, UString& result, UString& queue,
+                            bool delim, bool mark)
+{
+  State current_state = initial_state;
+  bool firstupper = u_isupper(word[0]);
+  bool uppercase = firstupper && u_isupper(word[1]);
+  for (auto symbol : symbol_iter(word)) {
+    int32_t val = (symbol.size() == 1 ? symbol[0] : alphabet(symbol));
+    if (current_state.size() != 0) {
+      current_state.step(val, beCaseSensitive(current_state));
+    }
+    if (current_state.isFinal(all_finals)) {
+      result.clear();
+      if (delim) result += '^';
+      if (mark) result += '=';
+      result += current_state.filterFinals(all_finals, alphabet,
+                                           escaped_chars,
+                                           displayWeightsMode, maxAnalyses, 
maxWeightClasses,
+                                           uppercase, firstupper, 0).substr(1);
+    }
+    if (current_state.size() == 0) {
+      if (!result.empty()) queue.append(symbol);
+      else return false;
+    }
+  }
+  return !result.empty();
+}
+
 UString
 FSTProcessor::biltransfull(UStringView input_word, bool with_delim)
 {
-  State current_state = initial_state;
   UString result;
   unsigned int start_point = 1;
   unsigned int end_point = input_word.size()-2;
@@ -1833,83 +1862,11 @@
     mark = true;
   }
 
-  bool firstupper = u_isupper(input_word[start_point]);
-  bool uppercase = firstupper && u_isupper(input_word[start_point+1]);
-
-  for(unsigned int i = start_point; i <= end_point; i++)
-  {
-    int val;
-    UString symbol;
-
-    if(input_word[i] == '\\')
-    {
-      i++;
-      val = static_cast<int32_t>(input_word[i]);
-    }
-    else if(input_word[i] == '<')
-    {
-      symbol = '<';
-      for(unsigned int j = i + 1; j <= end_point; j++)
-      {
-        symbol += input_word[j];
-        if(input_word[j] == '>')
-        {
-          i = j;
-          break;
-        }
-      }
-      val = alphabet(symbol);
-    }
-    else
-    {
-      val = static_cast<int32_t>(input_word[i]);
-    }
-    if(current_state.size() != 0)
-    {
-      if(!alphabet.isTag(val) && u_isupper(val) && 
!beCaseSensitive(current_state))
-      {
-        current_state.step(val, u_tolower(val));
-      }
-      else
-      {
-        current_state.step(val);
-      }
-    }
-    if(current_state.isFinal(all_finals))
-    {
-      result.clear();
-      if(with_delim) {
-        result += '^';
-      }
-      if(mark) {
-        result += '=';
-      }
-      result += current_state.filterFinals(all_finals, alphabet,
-                                           escaped_chars,
-                                           displayWeightsMode, maxAnalyses, 
maxWeightClasses,
-                                           uppercase, firstupper, 0).substr(1);
-    }
-
-    if(current_state.size() == 0)
-    {
-      if(!symbol.empty() && !result.empty())
-      {
-        queue.append(symbol);
-      }
-      else
-      {
-        // word is not present
-        if(with_delim)
-        {
-          result = "^@"_u + US(input_word.substr(1));
-        }
-        else
-        {
-          result = "@"_u + US(input_word);
-        }
-        return result;
-      }
-    }
+  auto word = input_word.substr(start_point, end_point-start_point);
+  bool exists = step_biltrans(word, result, queue, with_delim, mark);
+  if (!exists) {
+    if (with_delim) return "^@"_u + US(input_word.substr(1));
+    else return "@"_u + US(input_word);
   }
 
   if(start_point < (end_point - 3))
@@ -1920,27 +1877,7 @@
 
   if(!queue.empty())
   {
-    UString result_with_queue;
-    for(unsigned int i = 0, limit = result.size(); i != limit; i++)
-    {
-      switch(result[i])
-      {
-        case '\\':
-          result_with_queue += '\\';
-          i++;
-          break;
-
-        case '/':
-          result_with_queue.append(queue);
-          break;
-
-        default:
-          break;
-      }
-      result_with_queue += result[i];
-    }
-    result_with_queue.append(queue);
-
+    UString result_with_queue = compose(result, queue);
     if(with_delim)
     {
       result_with_queue += '$';
@@ -1986,110 +1923,18 @@
     mark = true;
   }
 
-  bool firstupper = u_isupper(input_word[start_point]);
-  bool uppercase = firstupper && u_isupper(input_word[start_point+1]);
-
-  for(unsigned int i = start_point; i <= end_point; i++)
-  {
-    int val;
-    UString symbol;
-
-    if(input_word[i] == '\\')
-    {
-      i++;
-      val = static_cast<int32_t>(input_word[i]);
-    }
-    else if(input_word[i] == '<')
-    {
-      symbol = '<';
-      for(unsigned int j = i + 1; j <= end_point; j++)
-      {
-        symbol += input_word[j];
-        if(input_word[j] == '>')
-        {
-          i = j;
-          break;
-        }
-      }
-      val = alphabet(symbol);
-    }
-    else
-    {
-      val = static_cast<int32_t>(input_word[i]);
-    }
-    if(current_state.size() != 0)
-    {
-      if(!alphabet.isTag(val) && u_isupper(val) && 
!beCaseSensitive(current_state))
-      {
-        current_state.step(val, u_tolower(val));
-      }
-      else
-      {
-        current_state.step(val);
-      }
-    }
-    if(current_state.isFinal(all_finals))
-    {
-      result.clear();
-      if (with_delim) {
-        result += '^';
-      }
-      if (mark) {
-        result += '=';
-      }
-      result += current_state.filterFinals(all_finals, alphabet,
-                                           escaped_chars,
-                                           displayWeightsMode, maxAnalyses, 
maxWeightClasses,
-                                           uppercase, firstupper, 0).substr(1);
-    }
-
-    if(current_state.size() == 0)
-    {
-      if(!symbol.empty() && !result.empty())
-      {
-        queue.append(symbol);
-      }
-      else
-      {
-        // word is not present
-        if(with_delim)
-        {
-          result = "^@"_u + US(input_word.substr(1));
-        }
-        else
-        {
-          result = "@"_u + US(input_word);
-        }
-        return result;
-      }
-    }
+  UStringView word = input_word.substr(start_point, end_point-start_point);
+  bool exists = step_biltrans(word, result, queue, with_delim, mark);
+  if (!exists) {
+    if (with_delim) return "^@"_u + US(input_word.substr(1));
+    else return "@"_u + US(input_word);
   }
 
   // attach unmatched queue automatically
 
   if(!queue.empty())
   {
-    UString result_with_queue;
-    for(unsigned int i = 0, limit = result.size(); i != limit; i++)
-    {
-      switch(result[i])
-      {
-        case '\\':
-          result_with_queue += '\\';
-          i++;
-          break;
-
-        case '/':
-          result_with_queue.append(queue);
-          break;
-
-        default:
-          break;
-      }
-      result_with_queue += result[i];
-    }
-    result_with_queue.append(queue);
-
+    UString result_with_queue = compose(result, queue);
     if(with_delim)
     {
       result_with_queue += '$';
@@ -2345,45 +2190,18 @@
   bool firstupper = u_isupper(input_word[start_point]);
   bool uppercase = firstupper && u_isupper(input_word[start_point+1]);
 
-  for(unsigned int i = start_point; i <= end_point; i++)
-  {
-    int val = 0;
-    UString symbol;
-
-    if(input_word[i] == '\\')
-    {
-      i++;
-      val = input_word[i];
-    }
-    else if(input_word[i] == '<')
-    {
-      seentags = true;
-      symbol = '<';
-      for(unsigned int j = i + 1; j <= end_point; j++)
-      {
-        symbol += input_word[j];
-        if(input_word[j] == '>')
-        {
-          i = j;
-          break;
-        }
-      }
+  UStringView word = input_word.substr(start_point, end_point-start_point);
+  for (auto symbol : symbol_iter(word)) {
+    int32_t val;
+    if (symbol.size() == 1) {
+      val = symbol[0];
+    } else {
       val = alphabet(symbol);
-    }
-    else
-    {
-      val = input_word[i];
+      seentags = true;
     }
     if(current_state.size() != 0)
     {
-      if(!alphabet.isTag(val) && u_isupper(val) && 
!beCaseSensitive(current_state))
-      {
-        current_state.step(val, u_tolower(val));
-      }
-      else
-      {
-        current_state.step(val);
-      }
+      current_state.step_case(val, beCaseSensitive(current_state));
     }
     if(current_state.isFinal(all_finals))
     {
@@ -2445,27 +2263,7 @@
 
   if(!queue.empty())
   {
-    UString result_with_queue;
-    for(unsigned int i = 0, limit = result.size(); i != limit; i++)
-    {
-      switch(result[i])
-      {
-        case '\\':
-          result_with_queue += '\\';
-          i++;
-          break;
-
-        case '/':
-          result_with_queue.append(queue);
-          break;
-
-        default:
-          break;
-      }
-      result_with_queue += result[i];
-    }
-    result_with_queue.append(queue);
-
+    UString result_with_queue = compose(result, queue);
     if(with_delim)
     {
       result_with_queue += '$';
@@ -2508,79 +2306,12 @@
     mark = true;
   }
 
-  bool firstupper = u_isupper(input_word[start_point]);
-  bool uppercase = firstupper && u_isupper(input_word[start_point+1]);
-
-  for(unsigned int i = start_point; i <= end_point; i++)
-  {
-    int val;
-    UString symbol;
-
-    if(input_word[i] == '\\')
-    {
-      i++;
-      val = static_cast<int32_t>(input_word[i]);
-    }
-    else if(input_word[i] == '<')
-    {
-      symbol = '<';
-      for(unsigned int j = i + 1; j <= end_point; j++)
-      {
-        symbol += input_word[j];
-        if(input_word[j] == '>')
-        {
-          i = j;
-          break;
-        }
-      }
-      val = alphabet(symbol);
-    }
-    else
-    {
-      val = static_cast<int32_t>(input_word[i]);
-    }
-    if(current_state.size() != 0)
-    {
-      if(!alphabet.isTag(val) && u_isupper(val) && 
!beCaseSensitive(current_state))
-      {
-        current_state.step(val, u_tolower(val));
-      }
-      else
-      {
-        current_state.step(val);
-      }
-    }
-    if(current_state.isFinal(all_finals))
-    {
-      result.clear();
-      if (with_delim) {
-        result += '^';
-      }
-      if (mark) {
-        result += '=';
-      }
-      result += current_state.filterFinals(all_finals, alphabet,
-                                           escaped_chars,
-                                           displayWeightsMode, maxAnalyses, 
maxWeightClasses,
-                                           uppercase, firstupper, 0).substr(1);
-    }
-
-    if(current_state.size() == 0)
-    {
-      if(symbol.empty())
-      {
-        // word is not present
-        if(with_delim)
-        {
-          result = "^@"_u + US(input_word.substr(1));
-        }
-        else
-        {
-          result = "@"_u + US(input_word);
-        }
-        return result;
-      }
-    }
+  auto word = input_word.substr(start_point, end_point-start_point);
+  UString queue;
+  bool exists = step_biltrans(word, result, queue, with_delim, mark);
+  if (!exists || !queue.empty()) {
+    if (with_delim) return "^@"_u + US(input_word.substr(1));
+    else return "@"_u + US(input_word);
   }
 
   if(with_delim)
@@ -2896,6 +2627,12 @@
   maxWeightClasses = value;
 }
 
+void
+FSTProcessor::setCompoundMaxElements(int value)
+{
+  compound_max_elements = value;
+}
+
 bool
 FSTProcessor::getDecompoundingMode()
 {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/fst_processor.h 
new/lttoolbox-3.7.6/lttoolbox/fst_processor.h
--- old/lttoolbox-3.7.1/lttoolbox/fst_processor.h       2022-11-01 
09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/lttoolbox/fst_processor.h       2023-12-27 
21:15:14.000000000 +0100
@@ -430,6 +430,8 @@
   void generation_wrapper_null_flush(InputFile& input, UFILE *output,
                                      GenerationMode mode);
   UString compose(UStringView lexforms, UStringView queue) const;
+  bool step_biltrans(UStringView word, UString& result, UString& queue,
+                     bool delim, bool mark);
 
   void procNodeICX();
   void procNodeRCX();
@@ -440,6 +442,7 @@
   xmlTextReaderPtr reader;
 
   static constexpr size_t max_case_insensitive_state_size = 65536;
+  bool max_case_insensitive_state_size_warned = false;
   /*
    * Including lowercased versions for every character can potentially create 
very large states
    * (See https://github.com/apertium/lttoolbox/issues/167 ). As a 
sanity-check we don't do
@@ -448,7 +451,20 @@
    * @return running with --case-sensitive or state size exceeds max
    */
   bool beCaseSensitive(const State& state) {
-    return caseSensitive || state.size() >= max_case_insensitive_state_size;
+    if(caseSensitive) {
+      return true;
+    }
+    else if(state.size() < max_case_insensitive_state_size)  {
+      return false;             // ie. do case-folding
+    }
+    else {
+      if(!max_case_insensitive_state_size_warned) {
+        max_case_insensitive_state_size_warned = true; // only warn once
+        UFILE* err_out = u_finit(stderr, NULL, NULL);
+        u_fprintf(err_out, "Warning: matching case-sensitively since processor 
state size >= %d\n", max_case_insensitive_state_size);
+      }
+      return true;
+    }
   }
 
 public:
@@ -505,6 +521,7 @@
   void setDisplayWeightsMode(bool value);
   void setMaxAnalysesValue(int value);
   void setMaxWeightClassesValue(int value);
+  void setCompoundMaxElements(int value);
   bool getNullFlush();
   bool getDecompoundingMode();
 };
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/input_file.cc 
new/lttoolbox-3.7.6/lttoolbox/input_file.cc
--- old/lttoolbox-3.7.1/lttoolbox/input_file.cc 2022-11-01 09:36:47.000000000 
+0100
+++ new/lttoolbox-3.7.6/lttoolbox/input_file.cc 2023-12-27 21:15:14.000000000 
+0100
@@ -44,6 +44,16 @@
   return (infile != nullptr);
 }
 
+#if HAVE_DECL_FMEMOPEN
+bool
+InputFile::open_in_memory(char *input_buffer)
+{
+  close();
+  infile = fmemopen(input_buffer, strlen(input_buffer), "rb");
+  return (infile != nullptr);
+}
+#endif
+
 void
 InputFile::open_or_exit(const char* fname)
 {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/input_file.h 
new/lttoolbox-3.7.6/lttoolbox/input_file.h
--- old/lttoolbox-3.7.1/lttoolbox/input_file.h  2022-11-01 09:36:47.000000000 
+0100
+++ new/lttoolbox-3.7.6/lttoolbox/input_file.h  2023-12-27 21:15:14.000000000 
+0100
@@ -34,6 +34,9 @@
   InputFile();
   ~InputFile();
   bool open(const char* fname = nullptr);
+#if HAVE_DECL_FMEMOPEN
+  bool open_in_memory(char* input_buffer);
+#endif
   void open_or_exit(const char* fname = nullptr);
   void close();
   void wrap(FILE* newinfile);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/lt_comp.cc 
new/lttoolbox-3.7.6/lttoolbox/lt_comp.cc
--- old/lttoolbox-3.7.1/lttoolbox/lt_comp.cc    2022-11-01 09:36:47.000000000 
+0100
+++ new/lttoolbox-3.7.6/lttoolbox/lt_comp.cc    2023-12-27 21:15:14.000000000 
+0100
@@ -125,7 +125,7 @@
   if(opc == "lr")
   {
     if (have_vl) {
-      std::cout << "Error: -l specified, but mode is lr" << std::endl;
+      std::cerr << "Error: -l specified, but mode is lr" << std::endl;
       cli.print_usage();
     }
     if(ttype == 'a')
@@ -144,7 +144,7 @@
   else if(opc == "rl")
   {
     if (have_vr) {
-      std::cout << "Error: -r specified, but mode is rl" << std::endl;
+      std::cerr << "Error: -r specified, but mode is rl" << std::endl;
       cli.print_usage();
     }
     if(ttype == 'a')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/lt_paradigm.cc 
new/lttoolbox-3.7.6/lttoolbox/lt_paradigm.cc
--- old/lttoolbox-3.7.1/lttoolbox/lt_paradigm.cc        2022-11-01 
09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/lttoolbox/lt_paradigm.cc        2023-12-27 
21:15:14.000000000 +0100
@@ -21,6 +21,8 @@
 #include <lttoolbox/state.h>
 #include <lttoolbox/trans_exe.h>
 #include <lttoolbox/cli.h>
+#include <lttoolbox/symbol_iter.h>
+#include <lttoolbox/string_utils.h>
 
 #include <queue>
 
@@ -55,17 +57,33 @@
   }
 }
 
+sorted_vector<int32_t> split_tag(UStringView sym, Alphabet& alpha, int prefix,
+                                 UChar32 sep)
+{
+  sorted_vector<int32_t> ret;
+  auto names = StringUtils::split_escaped(sym.substr(prefix+1, 
sym.size()-prefix-2), sep);
+  for (auto& tg : names) {
+    UString tag;
+    tag += '<';
+    tag += tg;
+    tag += '>';
+    ret.insert(alpha(tag));
+  }
+  return ret;
+}
+
 void process(UStringView pattern, std::map<UString, Transducer>& trans,
              Alphabet& alpha,
-             const std::set<UChar32>& letters, const std::set<int32_t>& tags,
+             const std::set<UChar32>& letters,
+             const sorted_vector<int32_t>& tags,
              UFILE* output, bool sort)
 {
   int32_t any_char = static_cast<int32_t>('*');
   int32_t any_tag = alpha(u"<*>");
-  std::vector<int32_t> pat = alpha.tokenize(pattern);
   Transducer other;
   int state = other.getInitial();
-  for (auto& it : pat) {
+  for (auto sym : symbol_iter(pattern)) {
+    int32_t it = (sym.size() == 1 ? sym[0] : alpha(sym));
     if (it == any_char) {
       state = other.insertNewSingleTransduction(0, state);
       for (auto& sym : letters) {
@@ -76,6 +94,30 @@
       for (auto& sym : tags) {
         other.linkStates(state, state, alpha(sym, sym));
       }
+    } else if (it == 0 && StringUtils::startswith(sym, "<*|"_u)) {
+      auto or_tags = split_tag(sym, alpha, 2, '|');
+      state = other.insertNewSingleTransduction(0, state);
+      for (auto& t : or_tags) {
+        other.linkStates(state, state, alpha(t, t));
+      }
+    } else if (it == 0 && StringUtils::startswith(sym, "<*"_u)) {
+      auto del_tags = split_tag(sym, alpha, 1, '-');
+      state = other.insertNewSingleTransduction(0, state);
+      for (auto& t : tags) {
+        if (del_tags.find(t) == del_tags.end()) {
+          other.linkStates(state, state, alpha(t, t));
+        }
+      }
+    } else if (it == 0 && StringUtils::startswith(sym, "<|"_u)) {
+      auto or_tags = split_tag(sym, alpha, 1, '|');
+      auto old_state = state;
+      for (auto& t : or_tags) {
+        if (old_state == state) {
+          state = other.insertNewSingleTransduction(alpha(t, t), state);
+        } else {
+          other.linkStates(old_state, state, alpha(t, t));
+        }
+      }
     } else {
       state = other.insertNewSingleTransduction(alpha(it, it), state);
     }
@@ -128,7 +170,7 @@
   fclose(fst);
 
   alpha.includeSymbol(u"<*>");
-  std::set<int32_t> tags;
+  sorted_vector<int32_t> tags;
   for (int32_t i = 1; i <= alpha.size(); i++) {
     if (!skip_tags.empty()) {
       UString t;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/lt_proc.cc 
new/lttoolbox-3.7.6/lttoolbox/lt_proc.cc
--- old/lttoolbox-3.7.1/lttoolbox/lt_proc.cc    2022-11-01 09:36:47.000000000 
+0100
+++ new/lttoolbox-3.7.6/lttoolbox/lt_proc.cc    2023-12-27 21:15:14.000000000 
+0100
@@ -59,6 +59,7 @@
   cli.add_bool_arg('W', "show-weights", "Print final analysis weights (if 
any)");
   cli.add_str_arg('N', "analyses", "Output no more than N analyses (if the 
transducer is weighted, the N best analyses)", "N");
   cli.add_str_arg('L', "weight-classes", "Output no more than N best weight 
classes (where analyses with equal weight constitute a class)", "N");
+  cli.add_str_arg('M', "compound-max-elements", "Set compound max elements", 
"N");
   cli.add_bool_arg('h', "help", "show this help");
   cli.parse_args(argc, argv);
 
@@ -157,6 +158,14 @@
     }
     fstp.setMaxWeightClassesValue(n);
   }
+  if (strs.find("compound-max-elements") != strs.end()) { // Test
+    int n = atoi(strs["compound-max-elements"].back().c_str());
+    if (n < 1) {
+      std::cerr << "Invalid or no argument for compound max elements" << 
std::endl;
+      exit(EXIT_FAILURE);
+    }
+    fstp.setCompoundMaxElements(n);
+  }
 
   FILE* in = openInBinFile(cli.get_files()[0]);
   fstp.load(in);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/sorted_vector.hpp 
new/lttoolbox-3.7.6/lttoolbox/sorted_vector.hpp
--- old/lttoolbox-3.7.1/lttoolbox/sorted_vector.hpp     2022-11-01 
09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/lttoolbox/sorted_vector.hpp     2023-12-27 
21:15:14.000000000 +0100
@@ -22,6 +22,7 @@
 #include <vector>
 #include <algorithm>
 #include <functional>
+#include <iterator>
 
 namespace detail {
        template<typename ForwardIt, typename Comp>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/state.cc 
new/lttoolbox-3.7.6/lttoolbox/state.cc
--- old/lttoolbox-3.7.1/lttoolbox/state.cc      2022-11-01 09:36:47.000000000 
+0100
+++ new/lttoolbox-3.7.6/lttoolbox/state.cc      2023-12-27 21:15:14.000000000 
+0100
@@ -431,6 +431,19 @@
   }
 }
 
+void
+State::step_optional(UChar32 val)
+{
+  if (val == 0) return;
+  std::vector<TNodeState> new_state;
+  for (size_t i = 0; i < state.size(); i++) {
+    apply_into(&new_state, val, i, false);
+  }
+  new_state.swap(state);
+  epsilonClosure();
+  new_state.swap(state);
+  state.insert(state.end(), new_state.begin(), new_state.end());
+}
 
 bool
 State::isFinal(std::map<Node *, double> const &finals) const
@@ -946,3 +959,14 @@
   retval += ']';
   return retval;
 }
+
+void
+State::merge(const State& other)
+{
+  for (auto& it : other.state) {
+    std::vector<std::pair<int, double>>* tmp = new std::vector<std::pair<int, 
double>>();
+    *tmp = *(it.sequence);
+    TNodeState ns(it.where, tmp, it.dirty);
+    this->state.push_back(std::move(ns));
+  }
+}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/state.h 
new/lttoolbox-3.7.6/lttoolbox/state.h
--- old/lttoolbox-3.7.1/lttoolbox/state.h       2022-11-01 09:36:47.000000000 
+0100
+++ new/lttoolbox-3.7.6/lttoolbox/state.h       2023-12-27 21:15:14.000000000 
+0100
@@ -201,6 +201,10 @@
 
   void step_case_override(const int val, const bool caseSensitive);
 
+  void step_optional(UChar32 val);
+
+  void closure(const sorted_vector<int32_t>& symbols);
+
   /**
    * Init the state with the initial node and empty output
    * @param initial the initial node of the transducer
@@ -223,6 +227,12 @@
   void pruneStatesWithForbiddenSymbol(int forbiddenSymbol);
 
   /**
+   * Remove states not containing a particular symbol
+   * @param symbol the symbol that is required
+   */
+  void requireSymbol(int32_t symbol);
+
+  /**
     * Whether any of the analyses contains a certain symbol
     * @param requiredSymbol the symbol we're looking for
     */
@@ -343,6 +353,11 @@
                          std::queue<UString> &blanks,
                          std::vector<UString> &numbers) const;
 
+  /**
+   * Add all paths in other to self
+   */
+  void merge(const State& other);
+
 };
 
 #endif
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/string_utils.cc 
new/lttoolbox-3.7.6/lttoolbox/string_utils.cc
--- old/lttoolbox-3.7.1/lttoolbox/string_utils.cc       2022-11-01 
09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/lttoolbox/string_utils.cc       2023-12-27 
21:15:14.000000000 +0100
@@ -67,6 +67,29 @@
   return result;
 }
 
+std::vector<UString>
+StringUtils::split_escaped(UStringView str, UChar delim)
+{
+  std::vector<UString> result;
+  size_t start = 0;
+  for (size_t i = 0; i < str.size(); i++) {
+    if (str[i] == '\\') {
+      i++;
+      continue;
+    }
+    if (str[i] == delim) {
+      if (i > start) {
+        result.push_back(US(str.substr(start, i-start)));
+      }
+      start = i+1;
+    }
+  }
+  if (start < str.size()) {
+    result.push_back(US(str.substr(start)));
+  }
+  return result;
+}
+
 UString
 StringUtils::join(const std::vector<UString>& vec, UStringView delim)
 {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/string_utils.h 
new/lttoolbox-3.7.6/lttoolbox/string_utils.h
--- old/lttoolbox-3.7.1/lttoolbox/string_utils.h        2022-11-01 
09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/lttoolbox/string_utils.h        2023-12-27 
21:15:14.000000000 +0100
@@ -12,6 +12,9 @@
   // split string on delimiter
   static std::vector<UString> split(UStringView str, UStringView delim=u" ");
 
+  // split but respect \ escapes
+  static std::vector<UString> split_escaped(UStringView str, UChar delim);
+
   // inverse of split
   static UString join(const std::vector<UString>& vec, UStringView delim);
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/symbol_iter.cc 
new/lttoolbox-3.7.6/lttoolbox/symbol_iter.cc
--- old/lttoolbox-3.7.1/lttoolbox/symbol_iter.cc        1970-01-01 
01:00:00.000000000 +0100
+++ new/lttoolbox-3.7.6/lttoolbox/symbol_iter.cc        2023-12-27 
21:15:14.000000000 +0100
@@ -0,0 +1,58 @@
+#include <lttoolbox/symbol_iter.h>
+#include <unicode/uchar.h>
+
+symbol_iter::iterator::iterator(UStringView s) : str(s)
+{
+  ++*this;
+}
+
+symbol_iter::iterator::iterator(const symbol_iter::iterator& other)
+  : str(other.str), sloc(other.sloc), eloc(other.eloc) {}
+
+symbol_iter::iterator::~iterator() {}
+
+UStringView symbol_iter::iterator::operator*() const {
+  return str.substr(sloc, eloc-sloc);
+}
+
+symbol_iter::iterator& symbol_iter::iterator::operator++()
+{
+  if (sloc < str.size()) {
+    sloc = eloc;
+    UChar32 c;
+    U16_NEXT(str.data(), eloc, str.size(), c);
+    if (c == '\\') {
+      sloc++;
+      U16_NEXT(str.data(), eloc, str.size(), c);
+    } else if (c == '<') {
+      auto i = eloc;
+      while (c != '>' && i < str.size()) U16_NEXT(str.data(), i, str.size(), 
c);
+      if (c == '>') eloc = i;
+    }
+    if (eloc > str.size()) eloc = str.size();
+  }
+  return *this;
+}
+
+bool symbol_iter::iterator::operator!=(const symbol_iter::iterator& o) const
+{
+  return str != o.str || sloc != o.sloc || eloc != o.eloc;
+}
+
+bool symbol_iter::iterator::operator==(const symbol_iter::iterator& o) const
+{
+  return str == o.str && sloc == o.sloc && eloc == o.eloc;
+}
+
+symbol_iter::iterator symbol_iter::begin() const
+{
+  return symbol_iter::iterator(str);
+}
+
+symbol_iter::iterator symbol_iter::end() const
+{
+  symbol_iter::iterator ret(str);
+  ret.sloc = str.size();
+  ret.eloc = str.size();
+  return ret;
+}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/lttoolbox/symbol_iter.h 
new/lttoolbox-3.7.6/lttoolbox/symbol_iter.h
--- old/lttoolbox-3.7.1/lttoolbox/symbol_iter.h 1970-01-01 01:00:00.000000000 
+0100
+++ new/lttoolbox-3.7.6/lttoolbox/symbol_iter.h 2023-12-27 21:15:14.000000000 
+0100
@@ -0,0 +1,33 @@
+#ifndef __LT_SYMBOL_ITER_H__
+#define __LT_SYMBOL_ITER_H__
+
+#include <ustring.h>
+
+class symbol_iter
+{
+private:
+  UStringView str;
+public:
+  symbol_iter(UStringView s) : str(s) {}
+  ~symbol_iter() {}
+  class iterator
+  {
+    friend symbol_iter;
+  private:
+    UStringView str;
+    UStringView::size_type sloc = 0;
+    UStringView::size_type eloc = 0;
+  public:
+    iterator(UStringView s);
+    iterator(const iterator& other);
+    ~iterator();
+    UStringView operator*() const;
+    iterator& operator++();
+    bool operator!=(const symbol_iter::iterator& other) const;
+    bool operator==(const symbol_iter::iterator& other) const;
+  };
+  iterator begin() const;
+  iterator end() const;
+};
+
+#endif
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/python/CMakeLists.txt 
new/lttoolbox-3.7.6/python/CMakeLists.txt
--- old/lttoolbox-3.7.1/python/CMakeLists.txt   2022-11-01 09:36:47.000000000 
+0100
+++ new/lttoolbox-3.7.6/python/CMakeLists.txt   1970-01-01 01:00:00.000000000 
+0100
@@ -1,33 +0,0 @@
-find_package(SWIG 3.0 REQUIRED)
-find_package(PythonInterp 3.5 REQUIRED)
-
-get_directory_property(_defs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 
COMPILE_DEFINITIONS)
-string(REPLACE ";" " -D" defs "-D${_defs}")
-
-set(PYTHON_FILE "lttoolbox.py")
-set(CPP_WRAP_FILE "lttoolbox_wrap.cpp")
-set(top_srcdir ${CMAKE_SOURCE_DIR})
-set(CXXFLAGS "${CMAKE_CXX_FLAGS} ${defs}")
-set(PACKAGE ${PROJECT_NAME})
-set(PACKAGE_NAME ${PROJECT_NAME})
-set(PACKAGE_VERSION ${PROJECT_VERSION})
-
-configure_file(lttoolbox.i.in lttoolbox.i @ONLY)
-configure_file(setup.py.in setup.py @ONLY)
-
-add_custom_command(OUTPUT ${CPP_WRAP_FILE} ${PYTHON_FILE}
-       COMMAND ${PYTHON_EXECUTABLE} setup.py build
-       COMMENT "Building ${PYTHON_FILE}"
-)
-
-add_custom_target(wrapper ALL
-       DEPENDS ${CPP_WRAP_FILE} ${PYTHON_FILE}
-       VERBATIM
-)
-
-if(NOT PYTHON_INSTALL_PARAMS)
-       set(PYTHON_INSTALL_PARAMS "--prefix=${CMAKE_INSTALL_PREFIX} 
--root=\$ENV{DESTDIR}/")
-endif()
-
-set(INSTALL_WRAPPER "${PYTHON_EXECUTABLE} setup.py install 
${PYTHON_INSTALL_PARAMS}")
-install(CODE "execute_process(COMMAND ${INSTALL_WRAPPER} WORKING_DIRECTORY 
${CMAKE_CURRENT_BINARY_DIR})")
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lttoolbox-3.7.1/tests/lt_paradigm/__init__.py 
new/lttoolbox-3.7.6/tests/lt_paradigm/__init__.py
--- old/lttoolbox-3.7.1/tests/lt_paradigm/__init__.py   2022-11-01 
09:36:47.000000000 +0100
+++ new/lttoolbox-3.7.6/tests/lt_paradigm/__init__.py   2023-12-27 
21:15:14.000000000 +0100
@@ -37,3 +37,34 @@
     inputs = ['*<n><*>']
     expectedOutputs = 
['ab<n><def>:abc\nab<n><ind>:ab\nn<n><ind>:n\ny<n><ind>:y']
     sortoutput = False
+
+class ExcludeSingleTest(ParadigmTest):
+    procdix = 'data/unbalanced-epsilons-mono.dix'
+    inputs = ['*<vblex><*>', '*<vblex><*-pres>', '*<vblex><*-inf-pret>']
+    expectedOutputs = [
+        
're<vblex><inf>:re\nre<vblex><pres>:rer\nre<vblex><pres>:res\nre<vblex><pret>:ret',
+        're<vblex><inf>:re\nre<vblex><pret>:ret',
+        're<vblex><pres>:rer\nre<vblex><pres>:res'
+    ]
+
+class OrTagTest(ParadigmTest):
+    procdix = 'data/unbalanced-epsilons-mono.dix'
+    inputs = ['re<vblex><|pres|pret>', 're<vblex><|inf>', 're<vblex><|xqz>']
+    expectedOutputs = [
+        're<vblex><pres>:rer\nre<vblex><pres>:res\nre<vblex><pret>:ret',
+        're<vblex><inf>:re\nre<vblex><pret>:ret',
+        ''
+    ]
+
+class OrTagRepeatTest(ParadigmTest):
+    procdix = 'data/unbalanced-epsilons-mono.dix'
+    inputs = [
+        're<*|vblex|pres|pret>',
+        're<*|inf|vblex>',
+        're<*|n|adj|vblex|inf>'
+    ]
+    expectedOutputs = [
+        're<vblex><pres>:rer\nre<vblex><pres>:res\nre<vblex><pret>:ret',
+        're<vblex><inf>:re\nre<vblex><pret>:ret',
+        're<vblex><inf>:re\nre<vblex><pret>:ret',
+    ]

Reply via email to