sw/qa/core/data/rtf/pass/fdo79384.rtf | 1 sw/qa/extras/rtfimport/data/fdo79384.rtf | 9 +++ sw/qa/extras/rtfimport/rtfimport.cxx | 8 ++ writerfilter/source/rtftok/rtfdocumentimpl.cxx | 72 ++++++++++++++++++------- writerfilter/source/rtftok/rtfdocumentimpl.hxx | 4 + 5 files changed, 75 insertions(+), 19 deletions(-)
New commits: commit 2c4c6daee00eb7a0b209f6a93518961a3ac5beb3 Author: Michael Stahl <mst...@redhat.com> Date: Tue Jun 3 20:32:13 2014 +0200 (related: bnc#823675) RTF import: get rid of hacks for \f in LISTLEVEL These weird hacks are apparently needed only because the \loch \hich \dbch were mapped wrongly; for the list level destination it's only important that the existing fonts are not overwritten. Change-Id: Ie2b9adf332b74c2744e9b1dbc4e878638e5ee078 (cherry picked from commit c087b60b0dd70c4a711ba1b4d556206a136fa468) diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index 2b346ba..9b6229c 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -3518,14 +3518,11 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) { RTFSprms aFontAttributes; aFontAttributes.set(nSprm, RTFValue::Pointer_t(new RTFValue(m_aFontNames[getFontIndex(nParam)]))); - // In the context of listlevels, \af seems to imply \f. - if (nKeyword == RTF_AF) - aFontAttributes.set(NS_ooxml::LN_CT_Fonts_ascii, RTFValue::Pointer_t(new RTFValue(m_aFontNames[getFontIndex(nParam)]))); RTFSprms aRunPropsSprms; aRunPropsSprms.set(NS_ooxml::LN_EG_RPrBase_rFonts, RTFValue::Pointer_t(new RTFValue(aFontAttributes))); - // If there are multiple \f or \af tokens, only handle the first one. - if (!m_aStates.top().aTableSprms.find(NS_ooxml::LN_CT_Lvl_rPr)) - m_aStates.top().aTableSprms.set(NS_ooxml::LN_CT_Lvl_rPr, RTFValue::Pointer_t(new RTFValue(RTFSprms(), aRunPropsSprms))); + m_aStates.top().aTableSprms.set(NS_ooxml::LN_CT_Lvl_rPr, + RTFValue::Pointer_t(new RTFValue(RTFSprms(), aRunPropsSprms)), + OVERWRITE_NO_APPEND); } else { commit eb0505ef3507876781ff7fadae2b1d1cd1348220 Author: Michael Stahl <mst...@redhat.com> Date: Tue Jun 3 20:18:59 2014 +0200 RTF import: fix handling of \loch \hich \dbch \ltrch \rtlch The logic is not immediately obvious from the RTF spec; let's do what the editengine RTF import does, but without the unnecessary complexity. Change-Id: I60e69130e6e5aed1f5d237f64b1656c3141e402a (cherry picked from commit 36246aa9fb57c9fe4e546c91a8274d8828b1424e) diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index 7c99b85..2b346ba 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -2825,8 +2825,10 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword) break; case RTF_LTRCH: // dmapper does not support this. + m_aStates.top().isRightToLeft = false; break; case RTF_RTLCH: + m_aStates.top().isRightToLeft = true; if (m_aDefaultState.nCurrentEncoding == RTL_TEXTENCODING_MS_1255) m_aStates.top().nCurrentEncoding = m_aDefaultState.nCurrentEncoding; break; @@ -3493,11 +3495,20 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) { case RTF_F: case RTF_AF: - if (nKeyword == RTF_F) - nSprm = NS_ooxml::LN_CT_Fonts_ascii; + if (m_aStates.top().isRightToLeft + || m_aStates.top().eRunType == RTFParserState::HICH) + { + nSprm = NS_ooxml::LN_CT_Fonts_cs; + } + else if (m_aStates.top().eRunType == RTFParserState::DBCH) + { + nSprm = NS_ooxml::LN_CT_Fonts_eastAsia; + } else - nSprm = (m_aStates.top().eRunType == RTFParserState::HICH - ? NS_ooxml::LN_CT_Fonts_eastAsia : NS_ooxml::LN_CT_Fonts_cs); + { + assert(m_aStates.top().eRunType == RTFParserState::LOCH); + nSprm = NS_ooxml::LN_CT_Fonts_ascii; + } if (m_aStates.top().nDestinationState == DESTINATION_FONTTABLE || m_aStates.top().nDestinationState == DESTINATION_FONTENTRY) { m_aFontIndexes.push_back(nParam); @@ -3521,7 +3532,8 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) m_nCurrentFontIndex = getFontIndex(nParam); RTFValue::Pointer_t pValue(new RTFValue(getFontName(m_nCurrentFontIndex))); lcl_putNestedAttribute(m_aStates.top().aCharacterSprms, NS_ooxml::LN_EG_RPrBase_rFonts, nSprm, pValue); - m_aStates.top().nCurrentEncoding = getEncoding(m_nCurrentFontIndex); + if (nKeyword == RTF_F) + m_aStates.top().nCurrentEncoding = getEncoding(m_nCurrentFontIndex); } break; case RTF_RED: @@ -5741,6 +5753,7 @@ RTFParserState::RTFParserState(RTFDocumentImpl* pDocumentImpl) aDrawingObject(), aFrame(this), eRunType(LOCH), + isRightToLeft(false), nYear(0), nMonth(0), nDay(0), diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx index d069baa..b86c54f 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx @@ -257,6 +257,8 @@ public: /// CJK or CTL? enum { LOCH, HICH, DBCH } eRunType; + /// ltrch or rtlch + bool isRightToLeft; // Info group. int nYear; commit 1063b8e8c122a819e844a2209a7136a8a9be31fd Author: Michael Stahl <mst...@redhat.com> Date: Tue Jun 3 19:32:10 2014 +0200 fdo#79384: replace the work-around with a different one Word will reject Shift-JIS following \loch, but apparently OOo could read and (worse) write such documents, so accept Shift-JIS regardless of run charset type. Change-Id: Ib181956e9f218548a52037dd76fa1d3ecdc006bd (cherry picked from commit d71387ca81b61416b9a7b82cd6cf67d496b81fc2) diff --git a/sw/qa/core/data/rtf/pass/fdo79384.rtf b/sw/qa/core/data/rtf/pass/fdo79384.rtf index 84875a9..c9d6b33 100644 --- a/sw/qa/core/data/rtf/pass/fdo79384.rtf +++ b/sw/qa/core/data/rtf/pass/fdo79384.rtf @@ -1,4 +1,5 @@ {\rtf1 +{\fonttbl{\f5\fnil\fprq0\fcharset128 OpenSymbol;}} {\stylesheet {\*\cs35\snext35\hich\af5\dbch\af5\loch\f5 Mp{u y{p;} } diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index cf989c7..7c99b85 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -957,11 +957,8 @@ int RTFDocumentImpl::resolveChars(char ch) bool bUnicodeChecked = false; bool bSkipped = false; - // Workaround for buggy input: if we're inside a style entry, then ignore - // the fact that '{' without a matching '}' is invalid. - bool bStyleEntry = m_aStates.top().nDestinationState == DESTINATION_STYLEENTRY; - - while (!Strm().IsEof() && (m_aStates.top().nInternalState == INTERNAL_HEX || ((ch != '{' || bStyleEntry) && ch != '}' && ch != '\\'))) + while (!Strm().IsEof() && (m_aStates.top().nInternalState == INTERNAL_HEX + || (ch != '{' && ch != '}' && ch != '\\'))) { if (m_aStates.top().nInternalState == INTERNAL_HEX || (ch != 0x0d && ch != 0x0a)) { @@ -985,9 +982,12 @@ int RTFDocumentImpl::resolveChars(char ch) if (m_aStates.top().nInternalState == INTERNAL_HEX) break; - if (RTFParserState::DBCH == m_aStates.top().eRunType && - RTL_TEXTENCODING_MS_932 == m_aStates.top().nCurrentEncoding) + if (RTL_TEXTENCODING_MS_932 == m_aStates.top().nCurrentEncoding) { + // fdo#79384: Word will reject Shift-JIS following \loch + // but apparently OOo could read and (worse) write such documents + SAL_INFO_IF(m_aStates.top().eRunType != RTFParserState::DBCH, + "writerfilter.rtftok", "invalid Shift-JIS without DBCH"); unsigned char uch = ch; if ((uch >= 0x80 && uch <= 0x9F) || uch >= 0xE0) { commit 8cd856d9705fbcd61ad902859769fc98bf6d7a69 Author: Michael Stahl <mst...@redhat.com> Date: Mon Jun 2 23:57:13 2014 +0200 fdo#79384: RTF import: fix literal Shift-JIS text This is a variable-length encoding, and the second byte may be a RTF syntax character like \, {, }. Change-Id: I813ccafda18388af3bf05eb7ce9a0253c627b1c4 (cherry picked from commit 061190a62fcdbfb3a0b266d5afffbd257a3e692e) diff --git a/sw/qa/extras/rtfimport/data/fdo79384.rtf b/sw/qa/extras/rtfimport/data/fdo79384.rtf new file mode 100644 index 0000000..2a90085 --- /dev/null +++ b/sw/qa/extras/rtfimport/data/fdo79384.rtf @@ -0,0 +1,9 @@ +{\rtf1\ansi +{\fonttbl{\f5\fnil\fprq0\fcharset128 OpenSymbol{\*\falt Arial Unicode MS};}} + +\pard\plain + +\dbch\f5 Mp{u y{p +}\ + +\par } diff --git a/sw/qa/extras/rtfimport/rtfimport.cxx b/sw/qa/extras/rtfimport/rtfimport.cxx index e96f037..49e9687 100644 --- a/sw/qa/extras/rtfimport/rtfimport.cxx +++ b/sw/qa/extras/rtfimport/rtfimport.cxx @@ -290,6 +290,14 @@ DECLARE_RTFIMPORT_TEST(testN751020, "n751020.rtf") CPPUNIT_ASSERT_EQUAL(sal_Int32(convertTwipToMm100(200)), getProperty<sal_Int32>(xParaEnum->nextElement(), "ParaBottomMargin")); } +DECLARE_RTFIMPORT_TEST(testFdo79384, "fdo79384.rtf") +{ + uno::Reference<text::XTextRange> xTextRange = getRun(getParagraph(1), 1); + + CPPUNIT_ASSERT_EQUAL(OUString("ÐаÑкеÑÑ ÑпиÑкамЫ", 31, RTL_TEXTENCODING_UTF8), + xTextRange->getString()); +} + DECLARE_RTFIMPORT_TEST(testFdo47326, "fdo47326.rtf") { // This was 15 only, as \super buffered text, then the contents of it got lost. diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index 6869014..cf989c7 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -980,9 +980,33 @@ int RTFDocumentImpl::resolveChars(char ch) m_aStates.top().nCharsToSkip--; } } + // read a single char if we're in hex mode if (m_aStates.top().nInternalState == INTERNAL_HEX) break; + + if (RTFParserState::DBCH == m_aStates.top().eRunType && + RTL_TEXTENCODING_MS_932 == m_aStates.top().nCurrentEncoding) + { + unsigned char uch = ch; + if ((uch >= 0x80 && uch <= 0x9F) || uch >= 0xE0) + { + // read second byte of 2-byte Shift-JIS - may be \ { } + Strm().ReadChar(ch); + if (m_aStates.top().nCharsToSkip == 0) + { + assert(bUnicodeChecked); + aBuf.append(ch); + } + else + { + assert(bSkipped); + // anybody who uses \ucN with Shift-JIS is insane + m_aStates.top().nCharsToSkip--; + } + } + } + Strm().ReadChar(ch); } if (m_aStates.top().nInternalState != INTERNAL_HEX && !Strm().IsEof()) @@ -2980,12 +3004,13 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword) break; case RTF_LOCH: // Noop, dmapper detects this automatically. + m_aStates.top().eRunType = RTFParserState::LOCH; break; case RTF_HICH: - m_aStates.top().bIsCjk = true; + m_aStates.top().eRunType = RTFParserState::HICH; break; case RTF_DBCH: - m_aStates.top().bIsCjk = false; + m_aStates.top().eRunType = RTFParserState::DBCH; break; case RTF_TITLEPG: { @@ -3471,7 +3496,8 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) if (nKeyword == RTF_F) nSprm = NS_ooxml::LN_CT_Fonts_ascii; else - nSprm = (m_aStates.top().bIsCjk ? NS_ooxml::LN_CT_Fonts_eastAsia : NS_ooxml::LN_CT_Fonts_cs); + nSprm = (m_aStates.top().eRunType == RTFParserState::HICH + ? NS_ooxml::LN_CT_Fonts_eastAsia : NS_ooxml::LN_CT_Fonts_cs); if (m_aStates.top().nDestinationState == DESTINATION_FONTTABLE || m_aStates.top().nDestinationState == DESTINATION_FONTENTRY) { m_aFontIndexes.push_back(nParam); @@ -5714,7 +5740,7 @@ RTFParserState::RTFParserState(RTFDocumentImpl* pDocumentImpl) aShape(), aDrawingObject(), aFrame(this), - bIsCjk(false), + eRunType(LOCH), nYear(0), nMonth(0), nDay(0), diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx index 3c7d2f3..d069baa 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx @@ -256,7 +256,7 @@ public: RTFFrame aFrame; /// CJK or CTL? - bool bIsCjk; + enum { LOCH, HICH, DBCH } eRunType; // Info group. int nYear;
_______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits