Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package kf6-kcodecs for openSUSE:Factory 
checked in at 2025-12-16 15:50:28
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/kf6-kcodecs (Old)
 and      /work/SRC/openSUSE:Factory/.kf6-kcodecs.new.1939 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "kf6-kcodecs"

Tue Dec 16 15:50:28 2025 rev:22 rq:1322697 version:6.21.0

Changes:
--------
--- /work/SRC/openSUSE:Factory/kf6-kcodecs/kf6-kcodecs.changes  2025-11-17 
12:14:44.375912457 +0100
+++ /work/SRC/openSUSE:Factory/.kf6-kcodecs.new.1939/kf6-kcodecs.changes        
2025-12-16 15:55:12.092188263 +0100
@@ -1,0 +2,15 @@
+Fri Dec 12 20:17:20 UTC 2025 - Christophe Marin <[email protected]>
+
+- Update to 6.21.0
+  * New feature release
+  * For more details please see:
+  * https://kde.org/announcements/frameworks/6/6.21.0
+- Changes since 6.20.0:
+  * Update dependency version to 6.21.0
+  * [KEncodingProber] Some more tests for UTF-8
+  * [KEncodingProber] Make UTF-8 state machine RFC3629 compliant
+  * [KEncodingProber] Add unit tests for UTF-8/UTF-16
+  * [KEncodingProber] Remove DEBUG_PROBE from public header file
+  * Update version to 6.21.0
+
+-------------------------------------------------------------------

Old:
----
  kcodecs-6.20.0.tar.xz
  kcodecs-6.20.0.tar.xz.sig

New:
----
  kcodecs-6.21.0.tar.xz
  kcodecs-6.21.0.tar.xz.sig

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ kf6-kcodecs.spec ++++++
--- /var/tmp/diff_new_pack.8W9VVT/_old  2025-12-16 15:55:13.364242129 +0100
+++ /var/tmp/diff_new_pack.8W9VVT/_new  2025-12-16 15:55:13.364242129 +0100
@@ -19,11 +19,11 @@
 %define qt6_version 6.8.0
 
 %define rname   kcodecs
-# Full KF6 version (e.g. 6.20.0)
+# Full KF6 version (e.g. 6.21.0)
 %{!?_kf6_version: %global _kf6_version %{version}}
 %bcond_without released
 Name:           kf6-kcodecs
-Version:        6.20.0
+Version:        6.21.0
 Release:        0
 Summary:        Method collection to manipulate strings using various encodings
 License:        LGPL-2.1-or-later


++++++ kcodecs-6.20.0.tar.xz -> kcodecs-6.21.0.tar.xz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/kcodecs-6.20.0/CMakeLists.txt 
new/kcodecs-6.21.0/CMakeLists.txt
--- old/kcodecs-6.20.0/CMakeLists.txt   2025-11-07 19:58:55.000000000 +0100
+++ new/kcodecs-6.21.0/CMakeLists.txt   2025-12-05 14:19:12.000000000 +0100
@@ -1,10 +1,10 @@
 cmake_minimum_required(VERSION 3.16)
 
-set(KF_VERSION "6.20.0") # handled by release scripts
+set(KF_VERSION "6.21.0") # handled by release scripts
 project(KCodecs VERSION ${KF_VERSION})
 
 include(FeatureSummary)
-find_package(ECM 6.20.0  NO_MODULE)
+find_package(ECM 6.21.0  NO_MODULE)
 set_package_properties(ECM PROPERTIES TYPE REQUIRED DESCRIPTION "Extra CMake 
Modules." URL "https://commits.kde.org/extra-cmake-modules";)
 feature_summary(WHAT REQUIRED_PACKAGES_NOT_FOUND 
FATAL_ON_MISSING_REQUIRED_PACKAGES)
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/kcodecs-6.20.0/autotests/CMakeLists.txt 
new/kcodecs-6.21.0/autotests/CMakeLists.txt
--- old/kcodecs-6.20.0/autotests/CMakeLists.txt 2025-11-07 19:58:55.000000000 
+0100
+++ new/kcodecs-6.21.0/autotests/CMakeLists.txt 2025-12-05 14:19:12.000000000 
+0100
@@ -21,6 +21,14 @@
     LINK_LIBRARIES KF6::Codecs Qt6::Test
 )
 
+ecm_add_test(
+    TEST_NAME kencodingproberunittest
+    kencodingproberunittest.cpp
+    ../src/probers/nsMBCSSM.cpp
+    LINK_LIBRARIES Qt6::Test
+)
+target_include_directories(kencodingproberunittest PRIVATE 
${CMAKE_BINARY_DIR}/src)
+
 # Benchmark, compiled, but not run automatically with ctest
 add_executable(base64benchmark base64benchmark.cpp)
 target_link_libraries(base64benchmark KF6::Codecs Qt6::Test)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/kcodecs-6.20.0/autotests/kencodingproberunittest.cpp 
new/kcodecs-6.21.0/autotests/kencodingproberunittest.cpp
--- old/kcodecs-6.20.0/autotests/kencodingproberunittest.cpp    1970-01-01 
01:00:00.000000000 +0100
+++ new/kcodecs-6.21.0/autotests/kencodingproberunittest.cpp    2025-12-05 
14:19:12.000000000 +0100
@@ -0,0 +1,226 @@
+/*
+    SPDX-FileCopyrightText: 2025 Stefan Brüns <[email protected]>
+
+    SPDX-License-Identifier: GPL-2.0-or-later
+*/
+
+#include <QTest>
+
+#include "../src/probers/nsCodingStateMachine.h"
+
+class KEncodingProberUnitTest : public QObject
+{
+    Q_OBJECT
+
+private Q_SLOTS:
+    void testUtf8();
+    void testUtf8_data();
+    void testUtf16BE();
+    void testUtf16BE_data();
+    void testUtf16LE();
+    void testUtf16LE_data();
+    void testUtf16_common_data();
+};
+
+void KEncodingProberUnitTest::testUtf8()
+{
+    QFETCH(QByteArray, data);
+    QFETCH(bool, utf8Valid);
+
+    using namespace kencodingprober;
+
+    nsCodingStateMachine stateMachine{&UTF8SMModel};
+    nsSMState state = eStart;
+
+    for (auto b : data) {
+        state = stateMachine.NextState(b);
+    }
+
+    if (utf8Valid) {
+        QVERIFY(state != eError);
+    } else {
+        QVERIFY(state == eError);
+    }
+}
+
+void KEncodingProberUnitTest::testUtf8_data()
+{
+    using namespace Qt::StringLiterals;
+
+    QTest::addColumn<QByteArray>("data");
+    QTest::addColumn<bool>("utf8Valid");
+
+    QTest::addRow("UTF-8 Latin1") << "abcdxyzABCDXYZ 0129;,"_ba << true;
+    QTest::addRow("BOM UTF-8") << "\xef\xbb\xbfZ"_ba << true; // "<UTF-8 BOM>Z"
+
+    // multibyte sequences - length 2
+    QTest::addRow("UTF-8 Latin1 Supplement") //
+        << "Latin1 Text \xC3\xA4\xC3\xB6\xC3\xBC\xC3\x9F"_ba // "Latin1 Text 
äöüß"
+        << true;
+    QTest::addRow("UTF-8 len 2") << "Text \xC3\xA4  "_ba << true;
+    QTest::addRow("UTF-8 len 2 short") << "Text \xC3  "_ba << false;
+    QTest::addRow("UTF-8 len 2 invalid range") << "Text \xC0\x90   "_ba << 
false;
+
+    // multibyte sequences - length 3
+    QTest::addRow("UTF-8 CJK") //
+        << QByteArray::fromHex("e998bfe5b094e58d91e696afe5b1b1e88489") // 
阿尔卑斯山脉
+        << true;
+    QTest::addRow("UTF-8 len 3 a") << "Text \xE2\x80\x90 "_ba << true; // "‐" 
(HYPHEN)
+    QTest::addRow("UTF-8 len 3-1 short") << "Text \xE2\x80 "_ba << false;
+    QTest::addRow("UTF-8 len 3-2 short") << "Text \xE2 "_ba << false;
+
+    QTest::addRow("UTF-8 len 3 b") << "Text \xE0\xbf\xbf "_ba << true; // "๏" 
(THAI CHARACTER FONGMAN)
+    QTest::addRow("UTF-8 len 3 invalid range") << "Text \xE0\x9f\x90 "_ba << 
false;
+
+    QTest::addRow("UTF-8 len 3 c") << "Text \xED\x80\x80 "_ba << true; // "퀀" 
(HANGUL SYLLABLE KWEON)
+    QTest::addRow("UTF-8 invalid CESU") << "Text \xED\xbf\x80 "_ba << false;
+
+    // multibyte sequences - length 4
+    QTest::addRow("UTF-8 SMP Symbols") << "\xF0\x9F\x82\xA1 "_ba << true; // 
"🂡 " (ACE OF SPADES)
+    QTest::addRow("UTF-8 len 4-1 short") << "\xF0\x9F\x82  "_ba << false;
+    QTest::addRow("UTF-8 len 4-2 short") << "\xF0\x9F   "_ba << false;
+    QTest::addRow("UTF-8 len 4-3 short") << "\xF0    "_ba << false;
+    QTest::addRow("UTF-8 len 4 invalid long") << "\xF0\x8F\x90\x90 "_ba << 
false;
+    QTest::addRow("UTF-8 len 4 invalid range") << "\xF5\x90\x90\x90 "_ba << 
false;
+
+    // multibyte sequences - length 5/6 (invalid)
+    QTest::addRow("UTF-8 len 5 invalid") << "\xF8\x90\x90\x90\x90 "_ba << 
false;
+    QTest::addRow("UTF-8 len 6 invalid") << "\xFC\x90\x90\x90\x90\x90 "_ba << 
false;
+
+    QTest::addRow("UTF-8 0xFE invalid") << "\xFE "_ba << false;
+    QTest::addRow("UTF-8 0xFF invalid") << "\xFF "_ba << false;
+
+    // continuation without leading 2/3/4 byte start byte
+    QTest::addRow("UTF-8 invalid isolate high 0x80") << "\x80 "_ba << false;
+    QTest::addRow("UTF-8 invalid isolate high 0x92") << "\x92 "_ba << false;
+    QTest::addRow("UTF-8 invalid isolate high 0xAA") << "\xAA "_ba << false;
+    QTest::addRow("UTF-8 invalid isolate high 0xBF") << "\xBF "_ba << false;
+
+    // Either Windows-1252/-1254/-1255 (binary identical)
+    // "One pound, i.e. ½ a kilogramm of butter costs 2 £."
+    QTest::addRow("Windows-125x English") << "One pound, i.e. \xAF a kilogramm 
of butter costs 2 \xA3."_ba << false;
+    // Example texts with Windows-125x encoding which are definitely not UTF-8 
-- see Wikipedia "Pangram"
+    // "Příliš žluťoučký kůň úpěl ďábelské ódy" - "A horse that was too yellow 
moaned devilish odes"
+    QTest::addRow("Windows-1250 Czech") << //
+        "P\xf8\xedli\x9a \x9elu\x9dou\xe8k\xfd k\xf9\xf2 \xfap\xecl 
\xef\xe1\x62\x65lsk\xe9 \xf3\x64y."_ba << false;
+    // "Под южно дърво, цъфтящо в синьо, бягаше малко пухкаво зайче" - "Under 
a southern tree, blooming in blue, ran a little fluffy bunny"
+    QTest::addRow("Windows-1251 Bulgarian") << QByteArray::fromHex( //
+        "cfeee420fee6edee20e4faf0e2ee2c20f6faf4f2fff9ee20"
+        "e220f1e8edfcee2c20e1ffe3e0f8e520ece0ebeaee20eff3"
+        "f5eae0e2ee20e7e0e9f7e5") << false;
+    // "Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich." - 
"Victor chases twelve boxers across the Great Levee of Sylt"
+    QTest::addRow("Windows-1252 German") << //
+        "Victor jagt zw\xf6lf Boxk\xe4mpfer quer \xfc\x62\x65r den 
gro\xdf\x65n Sylter Deich."_ba << false;
+    // "שפן אכל קצת גזר בטעם חסה, ודי" - "A bunny ate some lettuce-flavored 
carrots, and he had enough"
+    QTest::addRow("Windows-1255 Hebrew") //
+        << 
QByteArray::fromHex("f9f4ef20e0ebec20f7f6fa20e2e6f820e1e8f2ed20e7f1e42c20e5e3e9")
 << false;
+}
+
+void KEncodingProberUnitTest::testUtf16BE()
+{
+    QFETCH(QByteArray, data);
+    QFETCH(bool, utf16BEValid);
+
+    using namespace kencodingprober;
+
+    nsCodingStateMachine stateMachine{&UCS2BESMModel};
+    nsSMState state = eStart;
+
+    QEXPECT_FAIL("UTF16 Interpunctuation little", "valid codepoint rejected", 
Abort);
+    QEXPECT_FAIL("UTF16 Math supplement big", "valid codepoint rejected", 
Abort);
+    QEXPECT_FAIL("UTF16 ZWNBSP little", "zero width no-break space rejected", 
Abort);
+    for (auto b : data) {
+        state = stateMachine.NextState(b);
+    }
+
+    if (utf16BEValid) {
+        QVERIFY(state != eError);
+    } else {
+        QVERIFY(state == eError);
+    }
+}
+
+void KEncodingProberUnitTest::testUtf16LE()
+{
+    QFETCH(QByteArray, data);
+    QFETCH(bool, utf16LEValid);
+
+    using namespace kencodingprober;
+
+    nsCodingStateMachine stateMachine{&UCS2LESMModel};
+    nsSMState state = eStart;
+
+    QEXPECT_FAIL("UTF16 Interpunctuation big", "valid codepoint rejected", 
Abort);
+    QEXPECT_FAIL("UTF16 Math supplement little", "valid codepoint rejected", 
Abort);
+    QEXPECT_FAIL("UTF16 ZWNBSP big", "zero width no-break space rejected", 
Abort);
+    for (auto b : data) {
+        state = stateMachine.NextState(b);
+    }
+
+    if (utf16LEValid) {
+        QVERIFY(state != eError);
+    } else {
+        QVERIFY(state == eError);
+    }
+}
+
+void KEncodingProberUnitTest::testUtf16_common_data()
+{
+    QTest::addColumn<QByteArray>("data");
+    QTest::addColumn<bool>("utf16BEValid");
+    QTest::addColumn<bool>("utf16LEValid");
+
+    QTest::addRow("empty") << QByteArray() << true << true;
+    // BOM must be detected
+    QTest::addRow("BE BOM") << QByteArray("\xFE\xFF") << true << false;
+    QTest::addRow("LE BOM") << QByteArray("\xFF\xFE") << false << true;
+    // swapped endianess does not cause an error, as the codepoint is still 
valid
+    QTest::addRow("BE HS+LS") << QByteArray("\xDC\x00\xD8\x00") << true << 
true;
+    QTest::addRow("LE HS+LS") << QByteArray("\x00\xDC\x00\xD8") << true << 
true;
+
+    struct Utf16TestData {
+        const char *name;
+        const std::span<const char16_t> data;
+        bool validBig;
+        bool validLittle;
+    };
+    using namespace std::string_view_literals;
+    constexpr std::array<Utf16TestData, 7> utf16TestData = {
+        // syntactically correct even with wrong endianess
+        Utf16TestData{"UTF16 XY", u"XY"sv, true, true},
+        Utf16TestData{"UTF16 ab", u"ab"sv, true, true},
+        Utf16TestData{"UTF16 äöü", u"äöü"sv, true, true},
+        Utf16TestData{"UTF16 BOM", u"\xFEFF"sv, true, false},
+        // "‛" or "ᬠ" (U+1B20 BALINESE LETTER DA MURDA MAHAPRANA)
+        Utf16TestData{"UTF16 Interpunctuation", u"\x201B"sv, true, true},
+        // "⨯" or "⼪" (U+2F2A KANGXI RADICAL LAME)
+        Utf16TestData{"UTF16 Math supplement", u"\x2A2F"sv, true, true},
+        // ZWNBSP aka BOM inside the document is deprecated, but valid
+        Utf16TestData{"UTF16 ZWNBSP", u" \xFEFF"sv, true, true},
+    };
+
+    for (const auto &tc : utf16TestData) {
+        QByteArray data;
+        data.resize(tc.data.size() * 2);
+
+        qToBigEndian<quint16>(tc.data.data(), tc.data.size(), data.data());
+        QTest::addRow("%s big", tc.name) << data << tc.validBig << 
tc.validLittle;
+
+        qToLittleEndian<quint16>(tc.data.data(), tc.data.size(), data.data());
+        QTest::addRow("%s little", tc.name) << data << tc.validLittle << 
tc.validBig;
+    }
+}
+
+void KEncodingProberUnitTest::testUtf16BE_data()
+{
+    testUtf16_common_data();
+}
+
+void KEncodingProberUnitTest::testUtf16LE_data()
+{
+    testUtf16_common_data();
+}
+
+QTEST_MAIN(KEncodingProberUnitTest)
+
+#include "kencodingproberunittest.moc"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/kcodecs-6.20.0/src/CMakeLists.txt 
new/kcodecs-6.21.0/src/CMakeLists.txt
--- old/kcodecs-6.20.0/src/CMakeLists.txt       2025-11-07 19:58:55.000000000 
+0100
+++ new/kcodecs-6.21.0/src/CMakeLists.txt       2025-12-05 14:19:12.000000000 
+0100
@@ -26,6 +26,7 @@
     kemailaddress.h
     kencodingprober.cpp
     kencodingprober.h
+    kencodingprober_p.h
     probers/CharDistribution.cpp
     probers/CharDistribution.h
     probers/ChineseGroupProber.cpp
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/kcodecs-6.20.0/src/kencodingprober.cpp 
new/kcodecs-6.21.0/src/kencodingprober.cpp
--- old/kcodecs-6.20.0/src/kencodingprober.cpp  2025-11-07 19:58:55.000000000 
+0100
+++ new/kcodecs-6.21.0/src/kencodingprober.cpp  2025-12-05 14:19:12.000000000 
+0100
@@ -7,6 +7,7 @@
 */
 
 #include "kencodingprober.h"
+#include "kencodingprober_p.h"
 
 #include "probers/ChineseGroupProber.h"
 #include "probers/JapaneseGroupProber.h"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/kcodecs-6.20.0/src/kencodingprober.h 
new/kcodecs-6.21.0/src/kencodingprober.h
--- old/kcodecs-6.20.0/src/kencodingprober.h    2025-11-07 19:58:55.000000000 
+0100
+++ new/kcodecs-6.21.0/src/kencodingprober.h    2025-12-05 14:19:12.000000000 
+0100
@@ -8,15 +8,8 @@
 #ifndef KENCODINGPROBER_H
 #define KENCODINGPROBER_H
 
-// enable debug of private probers
-// #define DEBUG_PROBE
-
 #include <kcodecs_export.h>
 
-#ifdef DEBUG_PROBE
-#include <QDebug>
-#endif
-
 #include <QCoreApplication>
 #include <QString>
 #include <memory>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/kcodecs-6.20.0/src/kencodingprober_p.h 
new/kcodecs-6.21.0/src/kencodingprober_p.h
--- old/kcodecs-6.20.0/src/kencodingprober_p.h  1970-01-01 01:00:00.000000000 
+0100
+++ new/kcodecs-6.21.0/src/kencodingprober_p.h  2025-12-05 14:19:12.000000000 
+0100
@@ -0,0 +1,18 @@
+/*
+    This file is part of the KDE libraries
+
+    SPDX-FileCopyrightText: 2008 Wang Hoi <[email protected]>
+
+    SPDX-License-Identifier: LGPL-2.0-or-later
+*/
+#ifndef KENCODINGPROBER_P_H
+#define KENCODINGPROBER_P_H
+
+// enable debug of private probers
+// #define DEBUG_PROBE
+
+#ifdef DEBUG_PROBE
+#include <QDebug>
+#endif
+
+#endif
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/kcodecs-6.20.0/src/probers/nsCharSetProber.h 
new/kcodecs-6.21.0/src/probers/nsCharSetProber.h
--- old/kcodecs-6.20.0/src/probers/nsCharSetProber.h    2025-11-07 
19:58:55.000000000 +0100
+++ new/kcodecs-6.21.0/src/probers/nsCharSetProber.h    2025-12-05 
14:19:12.000000000 +0100
@@ -7,7 +7,9 @@
 #ifndef nsCharSetProber_h__
 #define nsCharSetProber_h__
 
-#include "kencodingprober.h"
+#include <kcodecs_export.h>
+
+#include "../kencodingprober_p.h"
 
 namespace kencodingprober
 {
@@ -32,7 +34,7 @@
     virtual float GetConfidence(void) = 0;
 
 #ifdef DEBUG_PROBE
-    void DumpStatus() override
+    virtual void DumpStatus()
     {
     }
 #endif
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/kcodecs-6.20.0/src/probers/nsCodingStateMachine.h 
new/kcodecs-6.21.0/src/probers/nsCodingStateMachine.h
--- old/kcodecs-6.20.0/src/probers/nsCodingStateMachine.h       2025-11-07 
19:58:55.000000000 +0100
+++ new/kcodecs-6.21.0/src/probers/nsCodingStateMachine.h       2025-12-05 
14:19:12.000000000 +0100
@@ -7,7 +7,7 @@
 #ifndef nsCodingStateMachine_h__
 #define nsCodingStateMachine_h__
 
-#include "kencodingprober.h"
+#include "../kencodingprober_p.h"
 
 #include "kcodecs_export.h"
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/kcodecs-6.20.0/src/probers/nsMBCSSM.cpp 
new/kcodecs-6.21.0/src/probers/nsMBCSSM.cpp
--- old/kcodecs-6.20.0/src/probers/nsMBCSSM.cpp 2025-11-07 19:58:55.000000000 
+0100
+++ new/kcodecs-6.21.0/src/probers/nsMBCSSM.cpp 2025-12-05 14:19:12.000000000 
+0100
@@ -471,58 +471,56 @@
     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 68 - 6f
     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 70 - 77
     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 78 - 7f
-    PCK4BITS(2, 2, 2, 2, 3, 3, 3, 3), // 80 - 87
-    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 88 - 8f
-    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 90 - 97
-    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 98 - 9f
-    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // a0 - a7
-    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // a8 - af
-    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // b0 - b7
-    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // b8 - bf
-    PCK4BITS(0, 0, 6, 6, 6, 6, 6, 6), // c0 - c7
-    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // c8 - cf
-    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // d0 - d7
-    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // d8 - df
-    PCK4BITS(7, 8, 8, 8, 8, 8, 8, 8), // e0 - e7
-    PCK4BITS(8, 8, 8, 8, 8, 9, 8, 8), // e8 - ef
-    PCK4BITS(10, 11, 11, 11, 11, 11, 11, 11), // f0 - f7
-    PCK4BITS(12, 13, 13, 13, 14, 15, 0, 0) // f8 - ff
+    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
+    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
+    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97
+    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f
+    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // a0 - a7
+    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // a8 - af
+    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // b0 - b7
+    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // b8 - bf
+    PCK4BITS(0, 0, 5, 5, 5, 5, 5, 5), // c0 - c7
+    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // c8 - cf
+    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // d0 - d7
+    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // d8 - df
+    PCK4BITS(6, 7, 7, 7, 7, 7, 7, 7), // e0 - e7
+    PCK4BITS(7, 7, 7, 7, 7, 8, 7, 7), // e7 - ef
+    PCK4BITS(9, 10, 10, 10, 11, 0, 0, 0), // f0 - f7
+    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0) // f8 - ff
 };
 
-static const unsigned int UTF8_st[26] = {
-    PCK4BITS(eError, eStart, eError, eError, eError, eError, 12, 10), // 00-07
-    PCK4BITS(9, 11, 8, 7, 6, 5, 4, 3), // 08-0f
-    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// 10-17
-    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// 18-1f
-    PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe), 
// 20-27
-    PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe), 
// 28-2f
-    PCK4BITS(eError, eError, 5, 5, 5, 5, eError, eError), // 30-37
-    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// 38-3f
-    PCK4BITS(eError, eError, eError, 5, 5, 5, eError, eError), // 40-47
-    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// 48-4f
-    PCK4BITS(eError, eError, 7, 7, 7, 7, eError, eError), // 50-57
-    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// 58-5f
-    PCK4BITS(eError, eError, eError, eError, 7, 7, eError, eError), // 60-67
-    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// 68-6f
-    PCK4BITS(eError, eError, 9, 9, 9, 9, eError, eError), // 70-77
-    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// 78-7f
-    PCK4BITS(eError, eError, eError, eError, eError, 9, eError, eError), // 
80-87
-    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// 88-8f
-    PCK4BITS(eError, eError, 12, 12, 12, 12, eError, eError), // 90-97
-    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// 98-9f
-    PCK4BITS(eError, eError, eError, eError, eError, 12, eError, eError), // 
a0-a7
-    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// a8-af
-    PCK4BITS(eError, eError, 12, 12, 12, eError, eError, eError), // b0-b7
-    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// b8-bf
-    PCK4BITS(eError, eError, eStart, eStart, eStart, eStart, eError, eError), 
// c0-c7
-    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError) 
// c8-cf
+static const unsigned int UTF8_st[10 * 12 / 8] = {
+    // clang-format off
+    // byteclass: 0       1       2       3       4       5       6       7   
// State
+    //            8       9      10      11 |     0       1       2       3
+    //            4       5       6       7       8       9      10      11
+    PCK4BITS(eError, eStart, eError, eError, eError,      3,      4,      5), 
// eStart
+    PCK4BITS(     6,      7,      8,      9, eError, eError, eError, eError), 
// eStart | eError
+    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// eError
+
+    PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe), 
// eItsMe
+    PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, eStart, eStart), 
// eItsMe | 3
+    PCK4BITS(eStart, eError, eError, eError, eError, eError, eError, eError), 
// 3
+
+    PCK4BITS(eError, eError, eError, eError,      3, eError, eError, eError), 
// 4
+    PCK4BITS(eError, eError, eError, eError, eError, eError,      3,      3), 
// 4 | 5
+    PCK4BITS(     3, eError, eError, eError, eError, eError, eError, eError), 
// 5
+
+    PCK4BITS(eError, eError,      3,      3, eError, eError, eError, eError), 
// 6
+    PCK4BITS(eError, eError, eError, eError, eError, eError, eError,      5), 
// 6 | 7
+    PCK4BITS(     5, eError, eError, eError, eError, eError, eError, eError), 
// 7
+
+    PCK4BITS(eError, eError,      5,      5,      5, eError, eError, eError), 
// 8
+    PCK4BITS(eError, eError, eError, eError, eError, eError,      5, eError), 
// 8 | 9
+    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), 
// 9
+    // clang-format on
 };
 
-static const unsigned int UTF8CharLenTable[] = {0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 
4, 4, 5, 5, 6, 6};
+static const unsigned int UTF8CharLenTable[] = {0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 
4, 4};
 
 const SMModel UTF8SMModel = {
     {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls},
-    16,
+    12,
     {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st},
     UTF8CharLenTable,
     "UTF-8",

Reply via email to