sc/qa/unit/data/csv/testTdf82254-csv-bom.csv | 3 +++ sc/qa/unit/subsequent_filters_test.cxx | 21 +++++++++++++++++++++ sc/source/ui/dbgui/asciiopt.cxx | 8 +++++++- sc/source/ui/dbgui/imoptdlg.cxx | 7 ++++++- sc/source/ui/docshell/docsh.cxx | 13 +++++++++++++ sc/source/ui/docshell/impex.cxx | 11 +++++++---- sc/source/ui/inc/asciiopt.hxx | 3 +++ sc/source/ui/inc/imoptdlg.hxx | 3 ++- sc/source/ui/inc/impex.hxx | 4 ++++ sc/source/ui/unoobj/filtuno.cxx | 6 ++++++ 10 files changed, 72 insertions(+), 7 deletions(-)
New commits: commit 509ab788baf54285b4e38f2560326657d97510fd Author: Andreas Heinisch <andreas.heini...@yahoo.de> AuthorDate: Fri Jan 20 12:08:13 2023 +0100 Commit: Eike Rathke <er...@redhat.com> CommitDate: Sat Feb 18 20:05:49 2023 +0000 tdf#82254 - Don't remove UTF-8 BOM from CSV when saving file Don't remove the byte-order-mark in the resulting CSV file when it was present in the CSV source file. Change-Id: Id26abad2686917f320f2ace85441621bcf57ea9e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/145879 Tested-by: Jenkins Reviewed-by: Eike Rathke <er...@redhat.com> diff --git a/sc/qa/unit/data/csv/testTdf82254-csv-bom.csv b/sc/qa/unit/data/csv/testTdf82254-csv-bom.csv new file mode 100755 index 000000000000..eabb18da0bf0 --- /dev/null +++ b/sc/qa/unit/data/csv/testTdf82254-csv-bom.csv @@ -0,0 +1,3 @@ +col1 col2 col3 +row1-1 row1-2 row1-3 +row2-1 row2-2 row2-3 diff --git a/sc/qa/unit/subsequent_filters_test.cxx b/sc/qa/unit/subsequent_filters_test.cxx index 10cf129c0412..6af0460170be 100644 --- a/sc/qa/unit/subsequent_filters_test.cxx +++ b/sc/qa/unit/subsequent_filters_test.cxx @@ -54,6 +54,8 @@ #include <undomanager.hxx> #include <tabprotection.hxx> +#include <sfx2/docfile.hxx> + #include <orcusfilters.hxx> #include <filter.hxx> @@ -196,6 +198,7 @@ public: void testImportCrashes(); void testTdf129681(); void testTdf149484(); + void testTdf82254_csv_bom(); void testEscapedUnicodeXLSX(); void testTdf144758_DBDataDefaultOrientation(); void testSharedFormulaXLS(); @@ -321,6 +324,7 @@ public: CPPUNIT_TEST(testImportCrashes); CPPUNIT_TEST(testTdf129681); CPPUNIT_TEST(testTdf149484); + CPPUNIT_TEST(testTdf82254_csv_bom); CPPUNIT_TEST(testEscapedUnicodeXLSX); CPPUNIT_TEST(testTdf144758_DBDataDefaultOrientation); CPPUNIT_TEST(testSharedFormulaXLS); @@ -3203,6 +3207,23 @@ void ScFiltersTest::testTdf149484() CPPUNIT_ASSERT_EQUAL(OUString("-TRUE-"), pDoc->GetString(0, 2, 0)); } +void ScFiltersTest::testTdf82254_csv_bom() +{ + setImportFilterName(SC_TEXT_CSV_FILTER_NAME); + createScDoc("csv/testTdf82254-csv-bom.csv"); + saveAndReload(SC_TEXT_CSV_FILTER_NAME); + ScDocShell* pDocSh = getScDocShell(); + SvStream* pStream = pDocSh->GetMedium()->GetInStream(); + + pStream->Seek(0); + CPPUNIT_ASSERT_EQUAL(sal_uInt64(0), pStream->Tell()); + pStream->StartReadingUnicodeText(RTL_TEXTENCODING_UTF8); + // Without the fix in place, this test would have failed with + // - Expected: 3 + // - Actual : 0 (no byte order mark was read) + CPPUNIT_ASSERT_EQUAL(sal_uInt64(3), pStream->Tell()); +} + void ScFiltersTest::testEscapedUnicodeXLSX() { createScDoc("xlsx/escape-unicode.xlsx"); diff --git a/sc/source/ui/dbgui/asciiopt.cxx b/sc/source/ui/dbgui/asciiopt.cxx index 933491efbbe5..4234794ea279 100644 --- a/sc/source/ui/dbgui/asciiopt.cxx +++ b/sc/source/ui/dbgui/asciiopt.cxx @@ -37,6 +37,7 @@ ScAsciiOptions::ScAsciiOptions() : bSkipEmptyCells(false), bSaveAsShown(true), bSaveFormulas(false), + bIncludeBOM(false), cTextSep ( cDefaultTextSep ), eCharSet ( osl_getThreadTextEncoding() ), eLang ( LANGUAGE_SYSTEM ), @@ -192,6 +193,9 @@ void ScAsciiOptions::ReadFromString( std::u16string_view rString ) } else bEvaluateFormulas = true; // default of versions that didn't add the parameter + + // Token 13: include BOM. + bIncludeBOM = nPos >= 0 && o3tl::getToken(rString, 0, ',', nPos) == u"true"; } OUString ScAsciiOptions::WriteToString() const @@ -261,7 +265,9 @@ OUString ScAsciiOptions::WriteToString() const // Token 11: sheet to export, always 0 for current sheet ",0," + // Token 12: evaluate formulas in import - OUString::boolean( bEvaluateFormulas ) + OUString::boolean( bEvaluateFormulas ) + "," + + // Token 13: include BOM + OUString::boolean(bIncludeBOM) ); return aOutStr.makeStringAndClear(); } diff --git a/sc/source/ui/dbgui/imoptdlg.cxx b/sc/source/ui/dbgui/imoptdlg.cxx index b285c6ae968e..2777eb9e450b 100644 --- a/sc/source/ui/dbgui/imoptdlg.cxx +++ b/sc/source/ui/dbgui/imoptdlg.cxx @@ -47,6 +47,7 @@ ScImportOptions::ScImportOptions( std::u16string_view rStr ) bRemoveSpace = false; nSheetToExport = 0; bEvaluateFormulas = true; // true if not present at all, for compatibility + bIncludeBOM = false; sal_Int32 nTokenCount = comphelper::string::getTokenCount(rStr, ','); if ( nTokenCount < 3 ) return; @@ -94,6 +95,8 @@ ScImportOptions::ScImportOptions( std::u16string_view rStr ) if ( nTokenCount >= 13 ) // If present, defaults to "false". bEvaluateFormulas = o3tl::getToken(rStr, 0, ',', nIdx) == u"true"; + if (nTokenCount >= 14) + bIncludeBOM = o3tl::getToken(rStr, 0, ',', nIdx) == u"true"; } } @@ -120,7 +123,9 @@ OUString ScImportOptions::BuildString() const "," + OUString::number(nSheetToExport) + // Only available for command line --convert-to "," + - OUString::boolean( bEvaluateFormulas ) ; // same as "Evaluate formulas" in ScAsciiOptions + OUString::boolean( bEvaluateFormulas ) + // same as "Evaluate formulas" in ScAsciiOptions + "," + + OUString::boolean(bIncludeBOM) ; // same as "Include BOM" in ScAsciiOptions return aResult; } diff --git a/sc/source/ui/docshell/docsh.cxx b/sc/source/ui/docshell/docsh.cxx index f37b969bd344..4bf9343734a7 100644 --- a/sc/source/ui/docshell/docsh.cxx +++ b/sc/source/ui/docshell/docsh.cxx @@ -1301,6 +1301,15 @@ bool ScDocShell::ConvertFrom( SfxMedium& rMedium ) sc::SetFormulaDirtyContext aCxt; m_pDocument->SetAllFormulasDirty(aCxt); + // tdf#82254 - check whether to include a byte-order-mark in the output + if (const bool bIncludeBOM = aImpEx.GetIncludeBOM()) + { + aOptions.SetIncludeBOM(bIncludeBOM); + if (rMedium.GetItemSet() != nullptr) + rMedium.GetItemSet()->Put( + SfxStringItem(SID_FILE_FILTEROPTIONS, aOptions.WriteToString())); + } + // for mobile case, we use a copy of the original document and give it a temporary name before editing // Therefore, the sheet name becomes ugly, long and nonsensical. #if !(defined ANDROID) @@ -1939,6 +1948,7 @@ void ScDocShell::AsciiSave( SvStream& rStream, const ScImportOptions& rAsciiOpt, bool bSaveNumberAsSuch = rAsciiOpt.bSaveNumberAsSuch; bool bSaveAsShown = rAsciiOpt.bSaveAsShown; bool bShowFormulas = rAsciiOpt.bSaveFormulas; + bool bIncludeBOM = rAsciiOpt.bIncludeBOM; rtl_TextEncoding eOldCharSet = rStream.GetStreamCharSet(); rStream.SetStreamCharSet( eCharSet ); @@ -1955,6 +1965,9 @@ void ScDocShell::AsciiSave( SvStream& rStream, const ScImportOptions& rAsciiOpt, } else { + // tdf#82254 - check whether to include a byte-order-mark in the output + if (bIncludeBOM && eCharSet == RTL_TEXTENCODING_UTF8) + rStream.WriteUChar(0xEF).WriteUChar(0xBB).WriteUChar(0xBF); aStrDelimEncoded = OString(&cStrDelim, 1, eCharSet); aDelimEncoded = OString(&cDelim, 1, eCharSet); rtl_TextEncodingInfo aInfo; diff --git a/sc/source/ui/docshell/impex.cxx b/sc/source/ui/docshell/impex.cxx index 1b8d715890b9..87735c2087a3 100644 --- a/sc/source/ui/docshell/impex.cxx +++ b/sc/source/ui/docshell/impex.cxx @@ -114,7 +114,7 @@ ScImportExport::ScImportExport( ScDocument& r ) bFormulas( false ), bIncludeFiltered( true ), bAll( true ), bSingle( true ), bUndo( false ), bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), - mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ) + mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false) { pUndoDoc = nullptr; pExtOptions = nullptr; @@ -129,7 +129,7 @@ ScImportExport::ScImportExport( ScDocument& r, const ScAddress& rPt ) bFormulas( false ), bIncludeFiltered( true ), bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ), bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), - mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ) + mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false) { pUndoDoc = nullptr; pExtOptions = nullptr; @@ -145,7 +145,7 @@ ScImportExport::ScImportExport( ScDocument& r, const ScRange& rRange ) bFormulas( false ), bIncludeFiltered( true ), bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ), bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), - mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ) + mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false) { pUndoDoc = nullptr; pExtOptions = nullptr; @@ -162,7 +162,7 @@ ScImportExport::ScImportExport( ScDocument& r, const OUString& rPos ) bFormulas( false ), bIncludeFiltered( true ), bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ), bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), - mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ) + mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false) { pUndoDoc = nullptr; pExtOptions = nullptr; @@ -1577,6 +1577,9 @@ bool ScImportExport::ExtText2Doc( SvStream& rStrm ) std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh, ScResId( STR_LOAD_DOC ), nRemaining, true )); rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() ); + // tdf#82254 - check whether to include a byte-order-mark in the output + if (nOldPos != rStrm.Tell()) + mbIncludeBOM = true; SCCOL nStartCol = aRange.aStart.Col(); SCCOL nEndCol = aRange.aEnd.Col(); diff --git a/sc/source/ui/inc/asciiopt.hxx b/sc/source/ui/inc/asciiopt.hxx index c1b9c33d6ce3..60b9a1425ef2 100644 --- a/sc/source/ui/inc/asciiopt.hxx +++ b/sc/source/ui/inc/asciiopt.hxx @@ -37,6 +37,7 @@ private: bool bSkipEmptyCells; bool bSaveAsShown; bool bSaveFormulas; + bool bIncludeBOM; sal_Unicode cTextSep; rtl_TextEncoding eCharSet; LanguageType eLang; @@ -61,6 +62,7 @@ public: bool IsDetectSpecialNumber() const { return bDetectSpecialNumber; } bool IsEvaluateFormulas() const { return bEvaluateFormulas; } bool IsSkipEmptyCells() const { return bSkipEmptyCells; } + bool GetIncludeBOM() const { return bIncludeBOM; } sal_Unicode GetTextSep() const { return cTextSep; } bool IsFixedLen() const { return bFixedLen; } sal_uInt16 GetInfoCount() const { return mvColStart.size(); } @@ -79,6 +81,7 @@ public: void SetDetectSpecialNumber(bool bSet) { bDetectSpecialNumber = bSet; } void SetEvaluateFormulas(bool bSet) { bEvaluateFormulas = bSet; } void SetSkipEmptyCells(bool bSet) { bSkipEmptyCells = bSet; } + void SetIncludeBOM(bool bVal) { bIncludeBOM = bVal; } void SetTextSep( sal_Unicode c ) { cTextSep = c; } void SetStartRow( sal_Int32 nRow) { nStartRow= nRow; } void SetLanguage(LanguageType e) { eLang = e; } diff --git a/sc/source/ui/inc/imoptdlg.hxx b/sc/source/ui/inc/imoptdlg.hxx index 2ffe9f4df77a..91bd9e460ae0 100644 --- a/sc/source/ui/inc/imoptdlg.hxx +++ b/sc/source/ui/inc/imoptdlg.hxx @@ -32,7 +32,7 @@ public: : nFieldSepCode(nFieldSep), nTextSepCode(nTextSep), bFixedWidth(false), bSaveAsShown(false), bQuoteAllText(false), bSaveNumberAsSuch(true), bSaveFormulas(false), bRemoveSpace(false), - bEvaluateFormulas(true), nSheetToExport(0) + bEvaluateFormulas(true), bIncludeBOM(false), nSheetToExport(0) { SetTextEncoding( nEnc ); } ScImportOptions& operator=( const ScImportOptions& rCpy ) = default; @@ -52,6 +52,7 @@ public: bool bSaveFormulas; bool bRemoveSpace; bool bEvaluateFormulas; + bool bIncludeBOM; // "0" for 'current sheet', "-1" for all sheets (each to a separate file), // or 1-based specific sheet number (to a separate file). sal_Int32 nSheetToExport; diff --git a/sc/source/ui/inc/impex.hxx b/sc/source/ui/inc/impex.hxx index e11b5d510d61..2631500e98a9 100644 --- a/sc/source/ui/inc/impex.hxx +++ b/sc/source/ui/inc/impex.hxx @@ -71,6 +71,7 @@ class SC_DLLPUBLIC ScImportExport bool mbOverwriting; // Whether we could be overwriting existing values (paste). // In this case we cannot use the insert optimization, but we // do not need to broadcast after the import. + bool mbIncludeBOM; // Whether to include a byte-order-mark in the output. ScExportTextOptions mExportTextOptions; std::unique_ptr<ScAsciiOptions> pExtOptions; // extended options @@ -158,6 +159,9 @@ public: void SetImportBroadcast( bool b ) { mbImportBroadcast = b; } void SetOverwriting( const bool bOverwriting ) { mbOverwriting = bOverwriting; } void SetExportTextOptions( const ScExportTextOptions& options ) { mExportTextOptions = options; } + + bool GetIncludeBOM() const { return mbIncludeBOM; } + void SetIncludeBOM(bool bVal) { mbIncludeBOM = bVal; } }; // Helper class for importing clipboard strings as streams. diff --git a/sc/source/ui/unoobj/filtuno.cxx b/sc/source/ui/unoobj/filtuno.cxx index a2b4d0f7e1c3..3831e9634622 100644 --- a/sc/source/ui/unoobj/filtuno.cxx +++ b/sc/source/ui/unoobj/filtuno.cxx @@ -231,6 +231,7 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute() rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW; OUString aTitle; + bool bIncludeBOM = false; if ( aFilterString == ScDocShell::GetAsciiFilterName() ) { @@ -245,6 +246,10 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute() aTitle = ScResId( STR_EXPORT_ASCII ); bAscii = true; + + ScAsciiOptions aOptions; + aOptions.ReadFromString(aFilterOptions); + bIncludeBOM = aOptions.GetIncludeBOM(); } else if ( aFilterString == ScDocShell::GetLotusFilterName() ) { @@ -299,6 +304,7 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute() } ScImportOptions aOptions( cAsciiDel, cStrDel, eEncoding); + aOptions.bIncludeBOM = bIncludeBOM; if(skipDialog) { // TODO: check we are not missing some of the stuff that ScImportOptionsDlg::GetImportOptions