sw/source/filter/md/swmd.cxx |   68 +++++++++++++++++++++++++++++++++++++++++--
 sw/source/filter/md/swmd.hxx |    2 -
 2 files changed, 66 insertions(+), 4 deletions(-)

New commits:
commit 122db7e3dbd38e6512dbc34e698ddfa4f8d8a5ba
Author:     Ujjawal Kumar <[email protected]>
AuthorDate: Sun Feb 22 19:32:13 2026 +0530
Commit:     Mike Kaganski <[email protected]>
CommitDate: Sun Feb 22 16:54:47 2026 +0100

    Markdown: Handle different file encodings correctly
    
    Change-Id: I0b7d3927b105b4b77b8e750d3c368c8708d11180
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/199978
    Tested-by: Jenkins CollaboraOffice <[email protected]>
    Reviewed-by: Mike Kaganski <[email protected]>

diff --git a/sw/source/filter/md/swmd.cxx b/sw/source/filter/md/swmd.cxx
index 1ca7b97217a2..5c4d1fc14329 100644
--- a/sw/source/filter/md/swmd.cxx
+++ b/sw/source/filter/md/swmd.cxx
@@ -57,6 +57,9 @@
 #include <fmturl.hxx>
 #include <formatcontentcontrol.hxx>
 #include <docsh.hxx>
+#include <unicode/utypes.h>
+#include <unicode/ucsdet.h>
+#include <rtl/tencinfo.h>
 
 #include "swmd.hxx"
 
@@ -747,7 +750,6 @@ SwMarkdownParser::SwMarkdownParser(SwDoc& rD, SwPaM& 
rCursor, SvStream& rIn, OUS
     m_nFilesize = m_rInput.TellEnd();
     m_rInput.Seek(STREAM_SEEK_TO_BEGIN);
     m_rInput.ResetError();
-    m_pArr.reset(new char[m_nFilesize + 1]);
 }
 
 void MarkdownReader::SetupFilterOptions(SwDoc& rDoc)
@@ -839,13 +841,73 @@ ErrCodeMsg MarkdownReader::Read(SwDoc& rDoc, const 
OUString& rBaseURL, SwPaM& rP
 
 ErrCode SwMarkdownParser::CallParser()
 {
+    // use utf8
+    m_rInput.StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW);
+    if (m_rInput.good())
+    {
+        rtl_TextEncoding eSrcEnc;
+        const sal_uInt64 nPos = m_rInput.Tell(); //bom size
+        if (nPos == 2)
+            eSrcEnc = RTL_TEXTENCODING_UCS2;
+        else if (nPos == 3)
+            eSrcEnc = RTL_TEXTENCODING_UTF8;
+        else
+        {
+            SvStreamEndian eEndian;
+            SfxObjectShell::DetectCharSet(m_rInput, eSrcEnc, eEndian);
+            if (eSrcEnc == RTL_TEXTENCODING_DONTKNOW)
+                return ERRCODE_IO_INVALIDCHAR;
+            m_rInput.SetEndian(eEndian);
+        }
+
+        m_rInput.ResetError();
+        m_nFilesize -= nPos;
+
+        if (eSrcEnc == RTL_TEXTENCODING_UTF8)
+        {
+            m_pArr.reset(new char[m_nFilesize]);
+            m_rInput.ReadBytes(m_pArr.get(), m_nFilesize);
+        }
+        else
+        {
+            OString sUtf8Data;
+            if (eSrcEnc == RTL_TEXTENCODING_UCS2)
+            {
+                if (m_nFilesize & 1)
+                    return ERRCODE_IO_INVALIDCHAR;
+
+                const sal_uInt64 nChars = m_nFilesize / 2;
+                OUString sData = read_uInt16s_ToOUString(m_rInput, nChars);
+                sUtf8Data = OUStringToOString(sData, RTL_TEXTENCODING_UTF8);
+            }
+            else
+            {
+                OUString sData = read_uInt8s_ToOUString(m_rInput, m_nFilesize, 
eSrcEnc);
+                sUtf8Data = OUStringToOString(sData, RTL_TEXTENCODING_UTF8);
+            }
+
+            if (sUtf8Data.getLength())
+            {
+                m_nFilesize = sUtf8Data.getLength();
+                m_pArr.reset(new char[m_nFilesize]);
+                memcpy(m_pArr.get(), sUtf8Data.getStr(), m_nFilesize);
+            }
+            else
+            {
+                return ERRCODE_IO_INVALIDCHAR;
+            }
+        }
+    }
+    else
+    {
+        return ERRCODE_IO_INVALIDCHAR;
+    }
+
     ::StartProgress(STR_STATSTR_W4WREAD, 0, m_nFilesize, 
m_xDoc->GetDocShell());
 
     SwTextFormatColl* pColl
         = 
m_xDoc->getIDocumentStylePoolAccess().GetTextCollFromPool(RES_POOLCOLL_TEXT);
     m_xDoc->SetTextFormatColl(*m_pPam, pColl);
-    m_rInput.ReadBytes(m_pArr.get(), m_nFilesize);
-    m_pArr[m_nFilesize] = '
 
     ErrCode nRet;
 
commit 8bf68dbffa1fec5f692c6641b4f505b4564c6fbb
Author:     Ujjawal Kumar <[email protected]>
AuthorDate: Sun Feb 22 19:31:03 2026 +0530
Commit:     Mike Kaganski <[email protected]>
CommitDate: Sun Feb 22 16:54:38 2026 +0100

    Change variable type from tools::Long to sal_uInt64
    
    Change-Id: I54da1879e06b7fa9a5e190127d2a05eef22db76a
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/199983
    Tested-by: Jenkins CollaboraOffice <[email protected]>
    Reviewed-by: Mike Kaganski <[email protected]>

diff --git a/sw/source/filter/md/swmd.hxx b/sw/source/filter/md/swmd.hxx
index 1f219357f609..afc3f9b145e2 100644
--- a/sw/source/filter/md/swmd.hxx
+++ b/sw/source/filter/md/swmd.hxx
@@ -82,7 +82,7 @@ class SwMarkdownParser
     // SfxMedium* m_pMedium;
     std::unique_ptr<char[]> m_pArr;
     std::unique_ptr<SwMdNumRuleInfo> m_pNumRuleInfo;
-    tools::Long m_nFilesize;
+    sal_uInt64 m_nFilesize;
 
     MDAttrStack m_aAttrStack;
 

Reply via email to