This is an automated email from the ASF dual-hosted git repository.

swebb2066 pushed a commit to branch fix_inputstream_reader
in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git

commit c440a7aeecdb9dec0ce21bb31cef58ed6d1724e1
Author: Stephen Webb <[email protected]>
AuthorDate: Wed Apr 8 12:26:00 2026 +1000

    InputStreamReader failed to load a multibyte UTF-8 sequence on a read 
boundary
---
 src/main/cpp/inputstreamreader.cpp | 22 +++++---------
 src/test/cpp/filetestcase.cpp      | 60 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 66 insertions(+), 16 deletions(-)

diff --git a/src/main/cpp/inputstreamreader.cpp 
b/src/main/cpp/inputstreamreader.cpp
index ba5ce19b..449a5f11 100644
--- a/src/main/cpp/inputstreamreader.cpp
+++ b/src/main/cpp/inputstreamreader.cpp
@@ -79,25 +79,17 @@ LogString InputStreamReader::read(Pool& p)
        while (m_priv->in->read(buf) >= 0)
        {
                buf.flip();
+               auto lastAvailableCount = buf.remaining();
                log4cxx_status_t stat = m_priv->dec->decode(buf, output);
 
-               if (stat != 0)
+               if (buf.remaining() == lastAvailableCount)
                {
-                       throw IOException(LOG4CXX_STR("decode"), stat);
-               }
-
-               if (buf.remaining() > 0)
-               {
-                       if (buf.remaining() == BUFSIZE)
-                       {
-                               throw IOException(LOG4CXX_STR("Decoder made no 
progress"));
-                       }
-                       buf.carry();
-               }
-               else
-               {
-                       buf.clear();
+                       if (stat != 0)
+                               throw IOException(LOG4CXX_STR("decode"), stat);
+                       else
+                               throw IOException(LOG4CXX_STR("decode made no 
progress"));
                }
+               buf.carry();
        }
 
        return output;
diff --git a/src/test/cpp/filetestcase.cpp b/src/test/cpp/filetestcase.cpp
index 00146c83..9910afdf 100644
--- a/src/test/cpp/filetestcase.cpp
+++ b/src/test/cpp/filetestcase.cpp
@@ -28,6 +28,8 @@
 #include <log4cxx/helpers/inputstreamreader.h>
 #include <log4cxx/helpers/fileinputstream.h>
 #include <log4cxx/helpers/loglog.h>
+#include <log4cxx/helpers/bytebuffer.h>
+#include <log4cxx/helpers/transcoder.h>
 
 #if LOG4CXX_CFSTRING_API
        #include <CoreFoundation/CFString.h>
@@ -58,6 +60,8 @@ LOGUNIT_CLASS(FileTestCase)
        LOGUNIT_TEST(copyConstructor);
        LOGUNIT_TEST(assignment);
        LOGUNIT_TEST(deleteBackslashedFileName);
+       LOGUNIT_TEST(testSplitMultibyteUtf8);
+       LOGUNIT_TEST(testInvalidUtf8);
        LOGUNIT_TEST_SUITE_END();
 
 #ifdef _DEBUG
@@ -102,6 +106,8 @@ public:
                }
                catch (IOException& ex)
                {
+                       LOG4CXX_DECODE_CHAR(msg, ex.what());
+                       LogLog::debug(msg);
                }
        }
 
@@ -206,7 +212,59 @@ public:
                Pool pool;
                /*bool deleted = */file.deleteFile(pool);
        }
-};
 
+       class MockInputStream : public InputStream
+       {
+               ByteBuffer m_data;
+       public:
+               MockInputStream(const char* data, size_t charCount)
+                       : m_data(const_cast<char*>(data), charCount)
+               {}
+
+               int read(ByteBuffer& dst) override
+               {
+                       auto availableBytes = m_data.remaining();
+                       if (availableBytes < 1)
+                               return -1;
+                       int count = 0;
+                       for (auto p = m_data.current(); count < availableBytes 
&& dst.put(*p); ++p)
+                               ++count;
+                       m_data.increment_position(count);
+                       return count;
+               }
+
+               void close() override {}
+       };
+
+       /**
+        * Tests behavior when a multibyte UTF-8 sequence occurs on a read 
boundary
+        */
+       void testSplitMultibyteUtf8()
+       {
+               Pool p;
+               // InputStreamReader uses a buffer of size 4096
+               std::string input( 4094, 'A' );
+               // räksmörgås.josefsson.org
+               
input.append("\162\303\244\153\163\155\303\266\162\147\303\245\163\056\152\157\163\145\146\163\163\157\156\056\157\162\147");
+               InputStreamReader 
reader(std::make_shared<MockInputStream>(input.c_str(), input.size()), 
CharsetDecoder::getUTF8Decoder());
+               auto contentLS = reader.read(p);
+               LOG4CXX_ENCODE_CHAR(content, contentLS);
+               LOGUNIT_ASSERT_EQUAL(input, content);
+       }
+
+       /**
+        * Tests behavior given an incomplete multibyte UTF-8 sequence in the 
input
+        */
+       void testInvalidUtf8()
+       {
+               Pool p;
+               // 0xC2 is a generic start byte for a 2-byte sequence in UTF-8.
+               char input[] = { 'A', (char)0xC2, 'B', 'C', 0 };
+               InputStreamReader 
reader(std::make_shared<MockInputStream>(input, 4), 
CharsetDecoder::getUTF8Decoder());
+               auto contentLS = reader.read(p);
+               LOG4CXX_ENCODE_CHAR(content, contentLS);
+               LOGUNIT_ASSERT_EQUAL("A", content);
+       }
+};
 
 LOGUNIT_TEST_SUITE_REGISTRATION(FileTestCase);

Reply via email to