sw/source/filter/md/swmd.cxx | 68 +++++++++++++++++++++++++++++++++++++++++-- sw/source/filter/md/swmd.hxx | 2 - 2 files changed, 66 insertions(+), 4 deletions(-)
New commits: commit 122db7e3dbd38e6512dbc34e698ddfa4f8d8a5ba Author: Ujjawal Kumar <[email protected]> AuthorDate: Sun Feb 22 19:32:13 2026 +0530 Commit: Mike Kaganski <[email protected]> CommitDate: Sun Feb 22 16:54:47 2026 +0100 Markdown: Handle different file encodings correctly Change-Id: I0b7d3927b105b4b77b8e750d3c368c8708d11180 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/199978 Tested-by: Jenkins CollaboraOffice <[email protected]> Reviewed-by: Mike Kaganski <[email protected]> diff --git a/sw/source/filter/md/swmd.cxx b/sw/source/filter/md/swmd.cxx index 1ca7b97217a2..5c4d1fc14329 100644 --- a/sw/source/filter/md/swmd.cxx +++ b/sw/source/filter/md/swmd.cxx @@ -57,6 +57,9 @@ #include <fmturl.hxx> #include <formatcontentcontrol.hxx> #include <docsh.hxx> +#include <unicode/utypes.h> +#include <unicode/ucsdet.h> +#include <rtl/tencinfo.h> #include "swmd.hxx" @@ -747,7 +750,6 @@ SwMarkdownParser::SwMarkdownParser(SwDoc& rD, SwPaM& rCursor, SvStream& rIn, OUS m_nFilesize = m_rInput.TellEnd(); m_rInput.Seek(STREAM_SEEK_TO_BEGIN); m_rInput.ResetError(); - m_pArr.reset(new char[m_nFilesize + 1]); } void MarkdownReader::SetupFilterOptions(SwDoc& rDoc) @@ -839,13 +841,73 @@ ErrCodeMsg MarkdownReader::Read(SwDoc& rDoc, const OUString& rBaseURL, SwPaM& rP ErrCode SwMarkdownParser::CallParser() { + // use utf8 + m_rInput.StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW); + if (m_rInput.good()) + { + rtl_TextEncoding eSrcEnc; + const sal_uInt64 nPos = m_rInput.Tell(); //bom size + if (nPos == 2) + eSrcEnc = RTL_TEXTENCODING_UCS2; + else if (nPos == 3) + eSrcEnc = RTL_TEXTENCODING_UTF8; + else + { + SvStreamEndian eEndian; + SfxObjectShell::DetectCharSet(m_rInput, eSrcEnc, eEndian); + if (eSrcEnc == RTL_TEXTENCODING_DONTKNOW) + return ERRCODE_IO_INVALIDCHAR; + m_rInput.SetEndian(eEndian); + } + + m_rInput.ResetError(); + m_nFilesize -= nPos; + + if (eSrcEnc == RTL_TEXTENCODING_UTF8) + { + m_pArr.reset(new char[m_nFilesize]); + m_rInput.ReadBytes(m_pArr.get(), m_nFilesize); + } + else + { + OString sUtf8Data; + if (eSrcEnc == RTL_TEXTENCODING_UCS2) + { + if (m_nFilesize & 1) + return ERRCODE_IO_INVALIDCHAR; + + const sal_uInt64 nChars = m_nFilesize / 2; + OUString sData = read_uInt16s_ToOUString(m_rInput, nChars); + sUtf8Data = OUStringToOString(sData, RTL_TEXTENCODING_UTF8); + } + else + { + OUString sData = read_uInt8s_ToOUString(m_rInput, m_nFilesize, eSrcEnc); + sUtf8Data = OUStringToOString(sData, RTL_TEXTENCODING_UTF8); + } + + if (sUtf8Data.getLength()) + { + m_nFilesize = sUtf8Data.getLength(); + m_pArr.reset(new char[m_nFilesize]); + memcpy(m_pArr.get(), sUtf8Data.getStr(), m_nFilesize); + } + else + { + return ERRCODE_IO_INVALIDCHAR; + } + } + } + else + { + return ERRCODE_IO_INVALIDCHAR; + } + ::StartProgress(STR_STATSTR_W4WREAD, 0, m_nFilesize, m_xDoc->GetDocShell()); SwTextFormatColl* pColl = m_xDoc->getIDocumentStylePoolAccess().GetTextCollFromPool(RES_POOLCOLL_TEXT); m_xDoc->SetTextFormatColl(*m_pPam, pColl); - m_rInput.ReadBytes(m_pArr.get(), m_nFilesize); - m_pArr[m_nFilesize] = ' ErrCode nRet; commit 8bf68dbffa1fec5f692c6641b4f505b4564c6fbb Author: Ujjawal Kumar <[email protected]> AuthorDate: Sun Feb 22 19:31:03 2026 +0530 Commit: Mike Kaganski <[email protected]> CommitDate: Sun Feb 22 16:54:38 2026 +0100 Change variable type from tools::Long to sal_uInt64 Change-Id: I54da1879e06b7fa9a5e190127d2a05eef22db76a Reviewed-on: https://gerrit.libreoffice.org/c/core/+/199983 Tested-by: Jenkins CollaboraOffice <[email protected]> Reviewed-by: Mike Kaganski <[email protected]> diff --git a/sw/source/filter/md/swmd.hxx b/sw/source/filter/md/swmd.hxx index 1f219357f609..afc3f9b145e2 100644 --- a/sw/source/filter/md/swmd.hxx +++ b/sw/source/filter/md/swmd.hxx @@ -82,7 +82,7 @@ class SwMarkdownParser // SfxMedium* m_pMedium; std::unique_ptr<char[]> m_pArr; std::unique_ptr<SwMdNumRuleInfo> m_pNumRuleInfo; - tools::Long m_nFilesize; + sal_uInt64 m_nFilesize; MDAttrStack m_aAttrStack;
