filter/source/config/cache/typedetection.cxx | 1 filter/source/config/fragments/filters/Markdown.xcu | 2 filter/source/config/fragments/types/generic_Markdown.xcu | 2 filter/source/textfilterdetect/filterdetect.cxx | 18 ++++ sw/CppunitTest_sw_uwriter.mk | 1 sw/Library_sw.mk | 2 sw/inc/shellio.hxx | 10 ++ sw/source/filter/basflt/fltini.cxx | 4 - sw/source/filter/md/swmd.cxx | 51 ++++++++++++++ sw/source/filter/md/swmd.hxx | 51 ++++++++++++++ 10 files changed, 138 insertions(+), 4 deletions(-)
New commits: commit 890d2c865215f8a679fbb8e0644e3f73c4c4bb0c Author: Ujjawal Kumar <randomfores...@gmail.com> AuthorDate: Fri Jun 13 23:54:58 2025 +0530 Commit: Thorsten Behrens <thorsten.behr...@collabora.com> CommitDate: Mon Jul 7 10:17:11 2025 +0200 Add reading Markdown files into Writer * using new dummy Markdown Filter * currently imports into empty doc Change-Id: Id11381a928636bb85ed6c4005e86a85f0d04eb9c Reviewed-on: https://gerrit.libreoffice.org/c/core/+/186484 Reviewed-by: Thorsten Behrens <thorsten.behr...@collabora.com> Tested-by: Jenkins diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx index 4fb9ab482571..68c30a479d67 100644 --- a/filter/source/config/cache/typedetection.cxx +++ b/filter/source/config/cache/typedetection.cxx @@ -275,6 +275,7 @@ int getFlatTypeRank(std::u16string_view rType) "writer_Rich_Text_Format", "writer_web_HTML_help", "generic_HTML", + "generic_Markdown", "generic_Text", // Plain text (catch all) diff --git a/filter/source/config/fragments/filters/Markdown.xcu b/filter/source/config/fragments/filters/Markdown.xcu index 33ce9369d9ac..75b6370bf542 100644 --- a/filter/source/config/fragments/filters/Markdown.xcu +++ b/filter/source/config/fragments/filters/Markdown.xcu @@ -7,7 +7,7 @@ * --> <node oor:name="Markdown" oor:op="replace"> - <prop oor:name="Flags"><value>EXPORT ALIEN</value></prop> + <prop oor:name="Flags"><value>IMPORT EXPORT ALIEN</value></prop> <prop oor:name="UIComponent"/> <prop oor:name="FilterService"/> <prop oor:name="UserData"><value>Markdown</value></prop> diff --git a/filter/source/config/fragments/types/generic_Markdown.xcu b/filter/source/config/fragments/types/generic_Markdown.xcu index b6ad050f1066..f87de148a6a3 100644 --- a/filter/source/config/fragments/types/generic_Markdown.xcu +++ b/filter/source/config/fragments/types/generic_Markdown.xcu @@ -9,7 +9,7 @@ <node oor:name="generic_Markdown" oor:op="replace" > <prop oor:name="DetectService"><value>com.sun.star.comp.filters.PlainTextFilterDetect</value></prop> <prop oor:name="URLPattern"/> - <prop oor:name="Extensions"><value>md</value></prop> + <prop oor:name="Extensions"><value>md markdown</value></prop> <prop oor:name="MediaType"><value>text/markdown</value></prop> <prop oor:name="Preferred"><value>false</value></prop> <prop oor:name="PreferredFilter"/> diff --git a/filter/source/textfilterdetect/filterdetect.cxx b/filter/source/textfilterdetect/filterdetect.cxx index c74e11b8a3a1..7162933178a1 100644 --- a/filter/source/textfilterdetect/filterdetect.cxx +++ b/filter/source/textfilterdetect/filterdetect.cxx @@ -24,6 +24,8 @@ constexpr OUString WRITER_TEXT_FILTER = u"Text"_ustr; constexpr OUString CALC_TEXT_FILTER = u"Text - txt - csv (StarCalc)"_ustr; +constexpr OUString WRITER_MARKDOWN_FILTER = u"Markdown"_ustr; + constexpr OUStringLiteral WEB_HTML_FILTER = u"HTML"; constexpr OUStringLiteral WRITER_HTML_FILTER = u"HTML (StarWriter)"; constexpr OUStringLiteral CALC_HTML_FILTER = u"calc_HTML_WebQuery"; @@ -154,6 +156,22 @@ OUString SAL_CALL PlainTextFilterDetect::detect(uno::Sequence<beans::PropertyVal aMediaDesc[MediaDescriptor::PROP_FILTERNAME] <<= OUString(WEB_HTML_FILTER); } + else if(aType == "generic_Markdown") + { + uno::Reference<io::XInputStream> xInStream(aMediaDesc[MediaDescriptor::PROP_INPUTSTREAM], uno::UNO_QUERY); + if (!xInStream.is()) + return OUString(); + INetURLObject aParser(aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_URL, OUString() ) ); + OUString aExt = aParser.getExtension(INetURLObject::LAST_SEGMENT, true, INetURLObject::DecodeMechanism::WithCharset); + aExt = aExt.toAsciiLowerCase(); + if(aDocService == WRITER_DOCSERVICE) + aMediaDesc[MediaDescriptor::PROP_FILTERNAME] <<= WRITER_MARKDOWN_FILTER; + else if(aExt == "md" || aExt == "markdown") + aMediaDesc[MediaDescriptor::PROP_FILTERNAME] <<= WRITER_MARKDOWN_FILTER; + else + return OUString(); + } + else if (aType == "generic_Text") { uno::Reference<io::XStream> xStream(aMediaDesc[MediaDescriptor::PROP_STREAM], uno::UNO_QUERY); diff --git a/sw/CppunitTest_sw_uwriter.mk b/sw/CppunitTest_sw_uwriter.mk index 0fdf9355bd4a..b2e7526b722e 100644 --- a/sw/CppunitTest_sw_uwriter.mk +++ b/sw/CppunitTest_sw_uwriter.mk @@ -76,6 +76,7 @@ $(eval $(call gb_CppunitTest_use_externals,sw_uwriter,\ icu_headers \ libxml2 \ yrs \ + md4c \ )) $(eval $(call gb_CppunitTest_set_include,sw_uwriter,\ diff --git a/sw/Library_sw.mk b/sw/Library_sw.mk index 14dada067431..4eed5131e189 100644 --- a/sw/Library_sw.mk +++ b/sw/Library_sw.mk @@ -96,6 +96,7 @@ $(eval $(call gb_Library_use_externals,sw,\ icu_headers \ libxml2 \ yrs \ + md4c \ )) ifneq ($(ENABLE_WASM_STRIP_ACCESSIBILITY),TRUE) @@ -568,6 +569,7 @@ $(eval $(call gb_Library_add_exception_objects,sw,\ sw/source/filter/html/svxcss1 \ sw/source/filter/html/swhtml \ sw/source/filter/html/wrthtml \ + sw/source/filter/md/swmd \ sw/source/filter/md/wrtmd \ sw/source/filter/writer/writer \ sw/source/filter/writer/wrt_fn \ diff --git a/sw/inc/shellio.hxx b/sw/inc/shellio.hxx index f82ac9301da5..fcf80c6377bc 100644 --- a/sw/inc/shellio.hxx +++ b/sw/inc/shellio.hxx @@ -303,6 +303,14 @@ public: AsciiReader(): Reader() {} }; +class MarkdownReader final : public Reader +{ + friend class SwReader; + virtual ErrCodeMsg Read( SwDoc &, const OUString& rBaseURL, SwPaM &, const OUString &) override; +public: + MarkdownReader(): Reader() {} +}; + class SW_DLLPUBLIC StgReader : public Reader { OUString m_aFltName; @@ -372,7 +380,7 @@ public: // BEGIN source/filter/basflt/fltini.cxx -extern Reader *ReadAscii, *ReadHTML, *ReadXML; +extern Reader *ReadAscii, *ReadHTML, *ReadXML, *ReadMarkdown; SW_DLLPUBLIC Reader* SwGetReaderXML(); diff --git a/sw/source/filter/basflt/fltini.cxx b/sw/source/filter/basflt/fltini.cxx index 80189e8e707f..410e31958e30 100644 --- a/sw/source/filter/basflt/fltini.cxx +++ b/sw/source/filter/basflt/fltini.cxx @@ -43,7 +43,7 @@ using namespace utl; using namespace com::sun::star::uno; using namespace com::sun::star; -Reader *ReadAscii = nullptr, *ReadHTML = nullptr, *ReadXML = nullptr; +Reader *ReadAscii = nullptr, *ReadHTML = nullptr, *ReadXML = nullptr, *ReadMarkdown = nullptr; static Reader* GetRTFReader(); static Reader* GetWW8Reader(); @@ -99,11 +99,13 @@ Filters::Filters() ReadAscii = new AsciiReader; ReadHTML = new HTMLReader; ReadXML = new XMLReader; + ReadMarkdown = new MarkdownReader; SetFltPtr( READER_WRITER_BAS, ReadAscii ); SetFltPtr( READER_WRITER_HTML, ReadHTML ); SetFltPtr( READER_WRITER_XML, ReadXML ); SetFltPtr( READER_WRITER_TEXT_DLG, ReadAscii ); SetFltPtr( READER_WRITER_TEXT, ReadAscii ); + SetFltPtr( READER_WRITER_MD, ReadMarkdown); } Filters::~Filters() diff --git a/sw/source/filter/md/swmd.cxx b/sw/source/filter/md/swmd.cxx new file mode 100644 index 000000000000..626e1e24b60f --- /dev/null +++ b/sw/source/filter/md/swmd.cxx @@ -0,0 +1,51 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <fltini.hxx> +#include <iodetect.hxx> + +#include "swmd.hxx" + +SwMarkdownParser::SwMarkdownParser(SwDoc& rD, SwPaM& rCursor, SvStream& rIn, bool bReadNewDoc) + : m_xDoc(&rD) + , m_rInput(rIn) + // , m_pMedium(&rMedium) + , m_bNewDoc(bReadNewDoc) +{ + rCursor.DeleteMark(); + m_pPam = &rCursor; + m_rInput.ResetError(); + m_nFilesize = m_rInput.TellEnd(); + m_rInput.Seek(STREAM_SEEK_TO_BEGIN); + m_rInput.ResetError(); + m_pArr.reset(new char[m_nFilesize + 2]); +} + +ErrCodeMsg MarkdownReader::Read(SwDoc&, const OUString&, SwPaM&, const OUString&) +{ + return ERRCODE_NONE; +} + +SwMarkdownParser::~SwMarkdownParser() +{ + m_pArr.reset(); + m_xDoc.clear(); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/sw/source/filter/md/swmd.hxx b/sw/source/filter/md/swmd.hxx new file mode 100644 index 000000000000..dbdd2c6e3167 --- /dev/null +++ b/sw/source/filter/md/swmd.hxx @@ -0,0 +1,51 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#pragma once + +#include <memory> + +#include <doc.hxx> +#include <pam.hxx> +#include <md4c.h> +#include <o3tl/unit_conversion.hxx> +#include <tools/stream.hxx> + +class SwMarkdownParser +{ + rtl::Reference<SwDoc> m_xDoc; + SwPaM* m_pPam; + SvStream& m_rInput; + // SfxMedium* m_pMedium; + std::unique_ptr<char[]> m_pArr; + tools::Long m_nFilesize; + + bool m_bNewDoc; + + SwMarkdownParser(const SwMarkdownParser&) = delete; + SwMarkdownParser& operator=(const SwMarkdownParser&) = delete; + +public: + SwMarkdownParser(SwDoc& rD, SwPaM& rCursor, SvStream& rIn, bool bReadNewDoc); + bool IsNewDoc() const { return m_bNewDoc; } + + ~SwMarkdownParser(); +}; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */