vcl/qa/cppunit/pdfexport/data/tdf164106.fodt | 133 +++++++++++++++++++++++++++ vcl/qa/cppunit/pdfexport/pdfexport2.cxx | 50 ++++++++++ vcl/source/gdi/CommonSalLayout.cxx | 84 ++++++++++++++++- 3 files changed, 263 insertions(+), 4 deletions(-)
New commits: commit 80e07ed70c11a5c3ab7c355bb1e75278d8e6bdf3 Author: Jonathan Clark <jonat...@libreoffice.org> AuthorDate: Thu Dec 5 20:49:03 2024 -0700 Commit: Adolfo Jayme Barrientos <fit...@ubuntu.com> CommitDate: Sat Dec 7 20:38:18 2024 +0100 tdf#164106 Fix reordered glyph positioning with split grapheme clusters Due to formatting, grapheme clusters can possibly be split across multiple layouts. Layouts containing split grapheme clusters are created by laying out the complete string, and extracting only the necessary glyphs based on source codepoint index. This approach is good enough for most diacritic cases, but it cannot handle certain substitution cases where glyphs with advances would be interleaved with other layouts. Sub-layouts must be contiguous. This change introduces code to disable grapheme cluster splitting in these cases that cannot be handled correctly. Change-Id: I122abbf9c3f8a5efa4c72ad47991d0ad9ff8a8c0 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/177927 Tested-by: Jenkins Reviewed-by: Jonathan Clark <jonat...@libreoffice.org> Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/178043 Reviewed-by: Adolfo Jayme Barrientos <fit...@ubuntu.com> diff --git a/vcl/qa/cppunit/pdfexport/data/tdf164106.fodt b/vcl/qa/cppunit/pdfexport/data/tdf164106.fodt new file mode 100644 index 000000000000..6d3866b43af0 --- /dev/null +++ b/vcl/qa/cppunit/pdfexport/data/tdf164106.fodt @@ -0,0 +1,133 @@ +<?xml version='1.0' encoding='UTF-8'?> +<office:document xmlns:css3t="http://www.w3.org/TR/css3-text/" xmlns:grddl="http://www.w3.org/2003/g/data-view#" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:drawooo="http://openoffice.org/2010/draw" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:c alcext="urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2" xmlns:tableooo="http://openoffice.org/2009/table" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:rpt="http://openoffice.org/2005/report" xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:officeooo="http://openoffice.org/2009/office" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns: meta:1.0" xmlns:loext="urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" office:version="1.4" office:mimetype="application/vnd.oasis.opendocument.text"> + <office:meta><meta:creation-date>2024-11-30T17:13:03</meta:creation-date><meta:initial-creator>your servant</meta:initial-creator><dc:language>en-US</dc:language><dc:date>2024-12-05T06:45:52.650400638</dc:date><meta:editing-cycles>6</meta:editing-cycles><meta:editing-duration>PT4M37S</meta:editing-duration><meta:generator>LibreOfficeDev/25.8.0.0.alpha0$Linux_X86_64 LibreOffice_project/5f4d5a012865d717040012eb0f698a725b82d4cc</meta:generator><meta:document-statistic meta:table-count="0" meta:image-count="0" meta:object-count="0" meta:page-count="1" meta:paragraph-count="2" meta:word-count="2" meta:character-count="16" meta:non-whitespace-character-count="16"/><meta:user-defined meta:name="AppVersion">15.0000</meta:user-defined><meta:template xlink:type="simple" xlink:actuate="onRequest" xlink:title="Normal" xlink:href=""/></office:meta> + <office:font-face-decls> + <style:font-face style:name="Arial" svg:font-family="Arial" style:font-family-generic="swiss" style:font-pitch="variable"/> + <style:font-face style:name="NSimSun" svg:font-family="NSimSun" style:font-family-generic="system" style:font-pitch="variable"/> + <style:font-face style:name="Noto Sans" svg:font-family="'Noto Sans'" style:font-family-generic="roman" style:font-pitch="variable"/> + <style:font-face style:name="Tahoma1" svg:font-family="Tahoma" style:font-family-generic="system" style:font-pitch="variable"/> + <style:font-face style:name="Times New Roman" svg:font-family="'Times New Roman'" style:font-family-generic="roman" style:font-pitch="variable"/> + <style:font-face style:name="Times New Roman1" svg:font-family="'Times New Roman'" style:font-family-generic="system" style:font-pitch="variable"/> + </office:font-face-decls> + <office:styles> + <style:default-style style:family="graphic"> + <style:graphic-properties svg:stroke-color="#3465a4" draw:fill-color="#729fcf" fo:wrap-option="no-wrap" draw:shadow-offset-x="0.1181in" draw:shadow-offset-y="0.1181in" draw:start-line-spacing-horizontal="0.1114in" draw:start-line-spacing-vertical="0.1114in" draw:end-line-spacing-horizontal="0.1114in" draw:end-line-spacing-vertical="0.1114in" style:writing-mode="lr-tb" style:flow-with-text="false"/> + <style:paragraph-properties style:text-autospace="ideograph-alpha" style:line-break="strict" loext:tab-stop-distance="0in" style:font-independent-line-spacing="false"> + <style:tab-stops/> + </style:paragraph-properties> + <style:text-properties style:use-window-font-color="true" loext:opacity="0%" style:font-name="Arial" fo:font-size="12pt" fo:language="en" fo:country="US" style:letter-kerning="true" style:font-name-asian="NSimSun" style:font-size-asian="12pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Tahoma1" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN"/> + </style:default-style> + <style:default-style style:family="paragraph"> + <style:paragraph-properties fo:hyphenation-ladder-count="no-limit" fo:hyphenation-keep="auto" loext:hyphenation-keep-type="column" style:text-autospace="ideograph-alpha" style:punctuation-wrap="hanging" style:line-break="strict" style:tab-stop-distance="0.4925in" style:writing-mode="lr-tb"/> + <style:text-properties style:use-window-font-color="true" loext:opacity="0%" style:font-name="Arial" fo:font-size="12pt" fo:language="en" fo:country="US" style:letter-kerning="true" style:font-name-asian="NSimSun" style:font-size-asian="12pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Tahoma1" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN" fo:hyphenate="false" fo:hyphenation-remain-char-count="2" fo:hyphenation-push-char-count="2" loext:hyphenation-no-caps="false" loext:hyphenation-no-last-word="false" loext:hyphenation-word-char-count="5" loext:hyphenation-zone="no-limit"/> + </style:default-style> + <style:default-style style:family="table"> + <style:table-properties table:border-model="collapsing"/> + </style:default-style> + <style:default-style style:family="table-row"> + <style:table-row-properties fo:keep-together="auto"/> + </style:default-style> + <style:style style:name="LO-normal" style:family="paragraph"> + <style:paragraph-properties fo:margin-top="0in" fo:margin-bottom="0in" style:contextual-spacing="false" fo:text-align="start" style:justify-single-word="false" fo:orphans="2" fo:widows="2" fo:hyphenation-ladder-count="no-limit" fo:hyphenation-keep="auto" loext:hyphenation-keep-type="column" style:writing-mode="lr-tb"/> + <style:text-properties style:use-window-font-color="true" loext:opacity="0%" style:font-name="Times New Roman" fo:font-family="'Times New Roman'" style:font-family-generic="roman" style:font-pitch="variable" fo:font-size="12pt" fo:language="fr" fo:country="CA" style:letter-kerning="false" style:font-name-asian="Times New Roman1" style:font-family-asian="'Times New Roman'" style:font-family-generic-asian="system" style:font-pitch-asian="variable" style:font-size-asian="12pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Times New Roman1" style:font-family-complex="'Times New Roman'" style:font-family-generic-complex="system" style:font-pitch-complex="variable" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN" fo:hyphenate="false" fo:hyphenation-remain-char-count="2" fo:hyphenation-push-char-count="2" loext:hyphenation-no-caps="false" loext:hyphenation-no-last-word="false" loext:hyphenation-word-char-count="5" loext: hyphenation-zone="no-limit"/> + </style:style> + <text:outline-style style:name="Outline"> + <text:outline-level-style text:level="1" loext:num-list-format="%1%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="2" loext:num-list-format="%2%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="3" loext:num-list-format="%3%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="4" loext:num-list-format="%4%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="5" loext:num-list-format="%5%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="6" loext:num-list-format="%6%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="7" loext:num-list-format="%7%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="8" loext:num-list-format="%8%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="9" loext:num-list-format="%9%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="10" loext:num-list-format="%10%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + </text:outline-style> + <text:notes-configuration text:note-class="footnote" style:num-format="1" text:start-value="0" text:footnotes-position="page" text:start-numbering-at="document"/> + <text:notes-configuration text:note-class="endnote" style:num-format="i" text:start-value="0"/> + <text:linenumbering-configuration text:number-lines="false" text:offset="0.1965in" style:num-format="1" text:number-position="left" text:increment="5"/> + <style:default-page-layout> + <style:page-layout-properties style:writing-mode="lr-tb" style:layout-grid-standard-mode="true"/> + </style:default-page-layout> + </office:styles> + <office:automatic-styles> + <style:style style:name="P1" style:family="paragraph" style:parent-style-name="LO-normal"> + <style:text-properties style:font-name-complex="Noto Sans"/> + </style:style> + <style:style style:name="P2" style:family="paragraph" style:parent-style-name="LO-normal"> + <style:text-properties fo:color="#0000ff" loext:opacity="100%" style:font-name-complex="Noto Sans"/> + </style:style> + <style:style style:name="T1" style:family="text"> + <style:text-properties fo:color="#00ff00" loext:opacity="100%"/> + </style:style> + <style:style style:name="T2" style:family="text"> + <style:text-properties fo:color="#0000ff" loext:opacity="100%"/> + </style:style> + <style:page-layout style:name="pm1"> + <style:page-layout-properties fo:page-width="8.5in" fo:page-height="11in" style:num-format="1" style:print-orientation="portrait" fo:margin-top="0.7874in" fo:margin-bottom="0.7874in" fo:margin-left="0.7874in" fo:margin-right="0.7874in" style:writing-mode="lr-tb" style:layout-grid-color="#c0c0c0" style:layout-grid-lines="136" style:layout-grid-base-height="0.0693in" style:layout-grid-ruby-height="0in" style:layout-grid-mode="none" style:layout-grid-ruby-below="false" style:layout-grid-print="false" style:layout-grid-display="false" style:layout-grid-base-width="0.1665in" style:layout-grid-snap-to="true" style:footnote-max-height="0in" loext:margin-gutter="0in"> + <style:footnote-sep style:width="0.0071in" style:distance-before-sep="0.0398in" style:distance-after-sep="0.0398in" style:line-style="solid" style:adjustment="left" style:rel-width="25%" style:color="#000000"/> + </style:page-layout-properties> + <style:header-style/> + <style:footer-style/> + </style:page-layout> + <style:style style:name="dp1" style:family="drawing-page"> + <style:drawing-page-properties draw:background-size="full"/> + </style:style> + </office:automatic-styles> + <office:master-styles> + <style:master-page style:name="Standard" style:page-layout-name="pm1" draw:style-name="dp1"/> + </office:master-styles> + <office:body> + <office:text> + <text:sequence-decls> + <text:sequence-decl text:display-outline-level="0" text:name="Illustration"/> + <text:sequence-decl text:display-outline-level="0" text:name="Table"/> + <text:sequence-decl text:display-outline-level="0" text:name="Text"/> + <text:sequence-decl text:display-outline-level="0" text:name="Drawing"/> + <text:sequence-decl text:display-outline-level="0" text:name="Figure"/> + </text:sequence-decls> + <text:p text:style-name="P1"><text:span text:style-name="T1">वीथीर्</text:span><text:span text:style-name="T2">भजनमार्गान्</text:span></text:p> + <text:p text:style-name="P2">वीथीर्भजनमार्गान्</text:p> + </office:text> + </office:body> +</office:document> diff --git a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx index 541bb5f8009e..e7a997b401e2 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx @@ -5719,6 +5719,56 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf162750SmallCapsLigature) CPPUNIT_ASSERT_EQUAL(u"FI"_ustr, aText.at(2).trim()); } +CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf164106SplitReorderedClusters) +{ + aMediaDescriptor[u"FilterName"_ustr] <<= u"writer_pdf_Export"_ustr; + saveAsPDF(u"tdf164106.fodt"); + + auto pPdfDocument = parsePDFExport(); + CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount()); + + auto pPdfPage = pPdfDocument->openPage(/*nIndex*/ 0); + CPPUNIT_ASSERT(pPdfPage); + auto pTextPage = pPdfPage->getTextPage(); + CPPUNIT_ASSERT(pTextPage); + + int nPageObjectCount = pPdfPage->getObjectCount(); + + CPPUNIT_ASSERT_EQUAL(14, nPageObjectCount); + + std::vector<OUString> aText; + std::vector<basegfx::B2DRectangle> aRect; + + for (int i = 0; i < nPageObjectCount; ++i) + { + auto pPageObject = pPdfPage->getObject(i); + CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr); + if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text) + { + aText.push_back(pPageObject->getText(pTextPage)); + aRect.push_back(pPageObject->getBounds()); + } + } + + CPPUNIT_ASSERT_EQUAL(size_t(14), aText.size()); + + auto fnCompareIndices = [&](size_t nSplit, size_t nCombined) { + CPPUNIT_ASSERT_EQUAL(aText.at(nSplit).trim(), aText.at(nCombined).trim()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(aRect.at(nSplit).getMinX(), aRect.at(nCombined).getMinX(), + /*delta*/ 0.2); + CPPUNIT_ASSERT_DOUBLES_EQUAL(aRect.at(nSplit).getMaxX(), aRect.at(nCombined).getMaxX(), + /*delta*/ 0.2); + }; + + fnCompareIndices(0, 7); + fnCompareIndices(1, 8); + fnCompareIndices(2, 9); + fnCompareIndices(3, 10); + fnCompareIndices(4, 11); + fnCompareIndices(5, 12); + fnCompareIndices(6, 13); +} + } // end anonymous namespace CPPUNIT_PLUGIN_IMPLEMENT(); diff --git a/vcl/source/gdi/CommonSalLayout.cxx b/vcl/source/gdi/CommonSalLayout.cxx index 115870dc9cf6..61b71f5ea7c4 100644 --- a/vcl/source/gdi/CommonSalLayout.cxx +++ b/vcl/source/gdi/CommonSalLayout.cxx @@ -150,6 +150,14 @@ public: return nClusterId; } + void Reset() + { + for (auto& rElement : m_aGlyphs) + { + rElement.second.m_bUsed = false; + } + } + void ShapeSubRun(const sal_Unicode* pStr, const int nLength, const SubRun& aSubRun, hb_font_t* pHbFont, const std::vector<hb_feature_t>& maFeatures, hb_language_t oHbLanguage) @@ -591,6 +599,73 @@ bool GenericSalLayout::LayoutText(vcl::text::ImplLayoutArgs& rArgs, const SalLay hb_glyph_info_t *pHbGlyphInfos = hb_buffer_get_glyph_infos(pHbBuffer, nullptr); hb_glyph_position_t *pHbPositions = hb_buffer_get_glyph_positions(pHbBuffer, nullptr); + // tdf#164106: Grapheme clusters can be split across multiple layouts. To do this, + // the complete string is laid out, and only the necessary glyphs are extracted. + // These sub-layouts are positioned side-by-side to form the complete text. + // This approach is good enough for most diacritic cases, but it cannot handle cases + // where a glyph with an advance is reordered into a different sub-layout. + bool bStartClusterOutOfOrder = false; + bool bEndClusterOutOfOrder = false; + { + double nNormalAdvance = 0.0; + double nStartAdvance = 0.0; + double nEndAdvance = 0.0; + + auto fnHandleGlyph = [&](int i) + { + int32_t nGlyphIndex = pHbGlyphInfos[i].codepoint; + int32_t nCluster = pHbGlyphInfos[i].cluster; + auto nOrigCharPos = stClusterMapper.RemapGlyph(nCluster, nGlyphIndex); + + double nAdvance = 0.0; + if (aSubRun.maDirection == HB_DIRECTION_TTB) + { + nAdvance = -pHbPositions[i].y_advance; + } + else + { + nAdvance = pHbPositions[i].x_advance; + } + + nNormalAdvance += nAdvance; + + if (nOrigCharPos < rArgs.mnDrawMinCharPos) + { + nStartAdvance += nAdvance; + if (nStartAdvance != nNormalAdvance) + { + bStartClusterOutOfOrder = true; + } + } + + if (nOrigCharPos < rArgs.mnDrawEndCharPos) + { + nEndAdvance += nAdvance; + if (nEndAdvance != nNormalAdvance) + { + bEndClusterOutOfOrder = true; + } + } + }; + + if (bRightToLeft) + { + for (int i = nRunGlyphCount - 1; i >= 0; --i) + { + fnHandleGlyph(i); + } + } + else + { + for (int i = 0; i < nRunGlyphCount; ++i) + { + fnHandleGlyph(i); + } + } + + stClusterMapper.Reset(); + } + for (int i = 0; i < nRunGlyphCount; ++i) { int32_t nGlyphIndex = pHbGlyphInfos[i].codepoint; int32_t nCharPos = pHbGlyphInfos[i].cluster; @@ -731,14 +806,15 @@ bool GenericSalLayout::LayoutText(vcl::text::ImplLayoutArgs& rArgs, const SalLay const GlyphItem aGI(nCharPos, nCharCount, nGlyphIndex, aNewPos, nGlyphFlags, nAdvance, nXOffset, nYOffset, nOrigCharPos); - if (aGI.origCharPos() >= rArgs.mnDrawMinCharPos - && aGI.origCharPos() < rArgs.mnDrawEndCharPos) + auto nLowerBound = (bStartClusterOutOfOrder ? aGI.charPos() : aGI.origCharPos()); + auto nUpperBound = (bEndClusterOutOfOrder ? aGI.charPos() : aGI.origCharPos()); + if (nLowerBound >= rArgs.mnDrawMinCharPos && nUpperBound < rArgs.mnDrawEndCharPos) { m_GlyphItems.push_back(aGI); } - if (aGI.origCharPos() >= rArgs.mnDrawOriginCluster - && aGI.origCharPos() < rArgs.mnDrawEndCharPos) + if (nLowerBound >= rArgs.mnDrawOriginCluster + && nUpperBound < rArgs.mnDrawEndCharPos) { aCurrPos.adjustX(nAdvance); }