sc/qa/unit/data/csv/testTdf82254-csv-bom.csv |    3 +++
 sc/qa/unit/subsequent_filters_test.cxx       |   21 +++++++++++++++++++++
 sc/source/ui/dbgui/asciiopt.cxx              |    8 +++++++-
 sc/source/ui/dbgui/imoptdlg.cxx              |    7 ++++++-
 sc/source/ui/docshell/docsh.cxx              |   13 +++++++++++++
 sc/source/ui/docshell/impex.cxx              |   11 +++++++----
 sc/source/ui/inc/asciiopt.hxx                |    3 +++
 sc/source/ui/inc/imoptdlg.hxx                |    3 ++-
 sc/source/ui/inc/impex.hxx                   |    4 ++++
 sc/source/ui/unoobj/filtuno.cxx              |    6 ++++++
 10 files changed, 72 insertions(+), 7 deletions(-)

New commits:
commit 509ab788baf54285b4e38f2560326657d97510fd
Author:     Andreas Heinisch <andreas.heini...@yahoo.de>
AuthorDate: Fri Jan 20 12:08:13 2023 +0100
Commit:     Eike Rathke <er...@redhat.com>
CommitDate: Sat Feb 18 20:05:49 2023 +0000

    tdf#82254 - Don't remove UTF-8 BOM from CSV when saving file
    
    Don't remove the byte-order-mark in the resulting CSV file
    when it was present in the CSV source file.
    
    Change-Id: Id26abad2686917f320f2ace85441621bcf57ea9e
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/145879
    Tested-by: Jenkins
    Reviewed-by: Eike Rathke <er...@redhat.com>

diff --git a/sc/qa/unit/data/csv/testTdf82254-csv-bom.csv 
b/sc/qa/unit/data/csv/testTdf82254-csv-bom.csv
new file mode 100755
index 000000000000..eabb18da0bf0
--- /dev/null
+++ b/sc/qa/unit/data/csv/testTdf82254-csv-bom.csv
@@ -0,0 +1,3 @@
+col1  col2    col3
+row1-1 row1-2  row1-3
+row2-1 row2-2  row2-3
diff --git a/sc/qa/unit/subsequent_filters_test.cxx 
b/sc/qa/unit/subsequent_filters_test.cxx
index 10cf129c0412..6af0460170be 100644
--- a/sc/qa/unit/subsequent_filters_test.cxx
+++ b/sc/qa/unit/subsequent_filters_test.cxx
@@ -54,6 +54,8 @@
 #include <undomanager.hxx>
 #include <tabprotection.hxx>
 
+#include <sfx2/docfile.hxx>
+
 #include <orcusfilters.hxx>
 #include <filter.hxx>
 
@@ -196,6 +198,7 @@ public:
     void testImportCrashes();
     void testTdf129681();
     void testTdf149484();
+    void testTdf82254_csv_bom();
     void testEscapedUnicodeXLSX();
     void testTdf144758_DBDataDefaultOrientation();
     void testSharedFormulaXLS();
@@ -321,6 +324,7 @@ public:
     CPPUNIT_TEST(testImportCrashes);
     CPPUNIT_TEST(testTdf129681);
     CPPUNIT_TEST(testTdf149484);
+    CPPUNIT_TEST(testTdf82254_csv_bom);
     CPPUNIT_TEST(testEscapedUnicodeXLSX);
     CPPUNIT_TEST(testTdf144758_DBDataDefaultOrientation);
     CPPUNIT_TEST(testSharedFormulaXLS);
@@ -3203,6 +3207,23 @@ void ScFiltersTest::testTdf149484()
     CPPUNIT_ASSERT_EQUAL(OUString("-TRUE-"), pDoc->GetString(0, 2, 0));
 }
 
+void ScFiltersTest::testTdf82254_csv_bom()
+{
+    setImportFilterName(SC_TEXT_CSV_FILTER_NAME);
+    createScDoc("csv/testTdf82254-csv-bom.csv");
+    saveAndReload(SC_TEXT_CSV_FILTER_NAME);
+    ScDocShell* pDocSh = getScDocShell();
+    SvStream* pStream = pDocSh->GetMedium()->GetInStream();
+
+    pStream->Seek(0);
+    CPPUNIT_ASSERT_EQUAL(sal_uInt64(0), pStream->Tell());
+    pStream->StartReadingUnicodeText(RTL_TEXTENCODING_UTF8);
+    // Without the fix in place, this test would have failed with
+    // - Expected: 3
+    // - Actual  : 0 (no byte order mark was read)
+    CPPUNIT_ASSERT_EQUAL(sal_uInt64(3), pStream->Tell());
+}
+
 void ScFiltersTest::testEscapedUnicodeXLSX()
 {
     createScDoc("xlsx/escape-unicode.xlsx");
diff --git a/sc/source/ui/dbgui/asciiopt.cxx b/sc/source/ui/dbgui/asciiopt.cxx
index 933491efbbe5..4234794ea279 100644
--- a/sc/source/ui/dbgui/asciiopt.cxx
+++ b/sc/source/ui/dbgui/asciiopt.cxx
@@ -37,6 +37,7 @@ ScAsciiOptions::ScAsciiOptions() :
     bSkipEmptyCells(false),
     bSaveAsShown(true),
     bSaveFormulas(false),
+    bIncludeBOM(false),
     cTextSep        ( cDefaultTextSep ),
     eCharSet        ( osl_getThreadTextEncoding() ),
     eLang           ( LANGUAGE_SYSTEM ),
@@ -192,6 +193,9 @@ void ScAsciiOptions::ReadFromString( std::u16string_view 
rString )
     }
     else
         bEvaluateFormulas = true;   // default of versions that didn't add the 
parameter
+
+    // Token 13: include BOM.
+    bIncludeBOM = nPos >= 0 && o3tl::getToken(rString, 0, ',', nPos) == 
u"true";
 }
 
 OUString ScAsciiOptions::WriteToString() const
@@ -261,7 +265,9 @@ OUString ScAsciiOptions::WriteToString() const
                // Token 11: sheet to export, always 0 for current sheet
                ",0," +
                // Token 12: evaluate formulas in import
-               OUString::boolean( bEvaluateFormulas )
+               OUString::boolean( bEvaluateFormulas ) + "," +
+               // Token 13: include BOM
+               OUString::boolean(bIncludeBOM)
             );
     return aOutStr.makeStringAndClear();
 }
diff --git a/sc/source/ui/dbgui/imoptdlg.cxx b/sc/source/ui/dbgui/imoptdlg.cxx
index b285c6ae968e..2777eb9e450b 100644
--- a/sc/source/ui/dbgui/imoptdlg.cxx
+++ b/sc/source/ui/dbgui/imoptdlg.cxx
@@ -47,6 +47,7 @@ ScImportOptions::ScImportOptions( std::u16string_view rStr )
     bRemoveSpace = false;
     nSheetToExport = 0;
     bEvaluateFormulas = true;   // true if not present at all, for 
compatibility
+    bIncludeBOM = false;
     sal_Int32 nTokenCount = comphelper::string::getTokenCount(rStr, ',');
     if ( nTokenCount < 3 )
         return;
@@ -94,6 +95,8 @@ ScImportOptions::ScImportOptions( std::u16string_view rStr )
         if ( nTokenCount >= 13 )
             // If present, defaults to "false".
             bEvaluateFormulas = o3tl::getToken(rStr, 0, ',', nIdx) == u"true";
+        if (nTokenCount >= 14)
+            bIncludeBOM = o3tl::getToken(rStr, 0, ',', nIdx) == u"true";
     }
 }
 
@@ -120,7 +123,9 @@ OUString ScImportOptions::BuildString() const
             "," +
             OUString::number(nSheetToExport) +  // Only available for command 
line --convert-to
             "," +
-            OUString::boolean( bEvaluateFormulas ) ;  // same as "Evaluate 
formulas" in ScAsciiOptions
+            OUString::boolean( bEvaluateFormulas ) +  // same as "Evaluate 
formulas" in ScAsciiOptions
+            "," +
+            OUString::boolean(bIncludeBOM) ;  // same as "Include BOM" in 
ScAsciiOptions
 
     return aResult;
 }
diff --git a/sc/source/ui/docshell/docsh.cxx b/sc/source/ui/docshell/docsh.cxx
index f37b969bd344..4bf9343734a7 100644
--- a/sc/source/ui/docshell/docsh.cxx
+++ b/sc/source/ui/docshell/docsh.cxx
@@ -1301,6 +1301,15 @@ bool ScDocShell::ConvertFrom( SfxMedium& rMedium )
                     sc::SetFormulaDirtyContext aCxt;
                     m_pDocument->SetAllFormulasDirty(aCxt);
 
+                    // tdf#82254 - check whether to include a byte-order-mark 
in the output
+                    if (const bool bIncludeBOM = aImpEx.GetIncludeBOM())
+                    {
+                        aOptions.SetIncludeBOM(bIncludeBOM);
+                        if (rMedium.GetItemSet() != nullptr)
+                            rMedium.GetItemSet()->Put(
+                                SfxStringItem(SID_FILE_FILTEROPTIONS, 
aOptions.WriteToString()));
+                    }
+
                     // for mobile case, we use a copy of the original document 
and give it a temporary name before editing
                     // Therefore, the sheet name becomes ugly, long and 
nonsensical.
 #if !(defined ANDROID)
@@ -1939,6 +1948,7 @@ void ScDocShell::AsciiSave( SvStream& rStream, const 
ScImportOptions& rAsciiOpt,
     bool bSaveNumberAsSuch = rAsciiOpt.bSaveNumberAsSuch;
     bool bSaveAsShown     = rAsciiOpt.bSaveAsShown;
     bool bShowFormulas    = rAsciiOpt.bSaveFormulas;
+    bool bIncludeBOM      = rAsciiOpt.bIncludeBOM;
 
     rtl_TextEncoding eOldCharSet = rStream.GetStreamCharSet();
     rStream.SetStreamCharSet( eCharSet );
@@ -1955,6 +1965,9 @@ void ScDocShell::AsciiSave( SvStream& rStream, const 
ScImportOptions& rAsciiOpt,
     }
     else
     {
+        // tdf#82254 - check whether to include a byte-order-mark in the output
+        if (bIncludeBOM && eCharSet == RTL_TEXTENCODING_UTF8)
+            rStream.WriteUChar(0xEF).WriteUChar(0xBB).WriteUChar(0xBF);
         aStrDelimEncoded = OString(&cStrDelim, 1, eCharSet);
         aDelimEncoded = OString(&cDelim, 1, eCharSet);
         rtl_TextEncodingInfo aInfo;
diff --git a/sc/source/ui/docshell/impex.cxx b/sc/source/ui/docshell/impex.cxx
index 1b8d715890b9..87735c2087a3 100644
--- a/sc/source/ui/docshell/impex.cxx
+++ b/sc/source/ui/docshell/impex.cxx
@@ -114,7 +114,7 @@ ScImportExport::ScImportExport( ScDocument& r )
       bFormulas( false ), bIncludeFiltered( true ),
       bAll( true ), bSingle( true ), bUndo( false ),
       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
-      mbApi( true ), mbImportBroadcast(false), mbOverwriting( false )
+      mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), 
mbIncludeBOM(false)
 {
     pUndoDoc = nullptr;
     pExtOptions = nullptr;
@@ -129,7 +129,7 @@ ScImportExport::ScImportExport( ScDocument& r, const 
ScAddress& rPt )
       bFormulas( false ), bIncludeFiltered( true ),
       bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
-      mbApi( true ), mbImportBroadcast(false), mbOverwriting( false )
+      mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), 
mbIncludeBOM(false)
 {
     pUndoDoc = nullptr;
     pExtOptions = nullptr;
@@ -145,7 +145,7 @@ ScImportExport::ScImportExport( ScDocument& r, const 
ScRange& rRange )
       bFormulas( false ), bIncludeFiltered( true ),
       bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ),
       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
-      mbApi( true ), mbImportBroadcast(false), mbOverwriting( false )
+      mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), 
mbIncludeBOM(false)
 {
     pUndoDoc = nullptr;
     pExtOptions = nullptr;
@@ -162,7 +162,7 @@ ScImportExport::ScImportExport( ScDocument& r, const 
OUString& rPos )
       bFormulas( false ), bIncludeFiltered( true ),
       bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
-      mbApi( true ), mbImportBroadcast(false), mbOverwriting( false )
+      mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), 
mbIncludeBOM(false)
 {
     pUndoDoc = nullptr;
     pExtOptions = nullptr;
@@ -1577,6 +1577,9 @@ bool ScImportExport::ExtText2Doc( SvStream& rStrm )
     std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh,
             ScResId( STR_LOAD_DOC ), nRemaining, true ));
     rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
+    // tdf#82254 - check whether to include a byte-order-mark in the output
+    if (nOldPos != rStrm.Tell())
+        mbIncludeBOM = true;
 
     SCCOL nStartCol = aRange.aStart.Col();
     SCCOL nEndCol = aRange.aEnd.Col();
diff --git a/sc/source/ui/inc/asciiopt.hxx b/sc/source/ui/inc/asciiopt.hxx
index c1b9c33d6ce3..60b9a1425ef2 100644
--- a/sc/source/ui/inc/asciiopt.hxx
+++ b/sc/source/ui/inc/asciiopt.hxx
@@ -37,6 +37,7 @@ private:
     bool        bSkipEmptyCells;
     bool        bSaveAsShown;
     bool        bSaveFormulas;
+    bool        bIncludeBOM;
     sal_Unicode cTextSep;
     rtl_TextEncoding eCharSet;
     LanguageType eLang;
@@ -61,6 +62,7 @@ public:
     bool                IsDetectSpecialNumber() const { return 
bDetectSpecialNumber; }
     bool                IsEvaluateFormulas() const    { return 
bEvaluateFormulas; }
     bool                IsSkipEmptyCells() const      { return 
bSkipEmptyCells; }
+    bool                GetIncludeBOM() const   { return bIncludeBOM; }
     sal_Unicode         GetTextSep() const      { return cTextSep; }
     bool                IsFixedLen() const      { return bFixedLen; }
     sal_uInt16          GetInfoCount() const    { return mvColStart.size(); }
@@ -79,6 +81,7 @@ public:
     void    SetDetectSpecialNumber(bool bSet)   { bDetectSpecialNumber = bSet; 
}
     void    SetEvaluateFormulas(bool bSet)      { bEvaluateFormulas = bSet; }
     void    SetSkipEmptyCells(bool bSet)        { bSkipEmptyCells = bSet; }
+    void    SetIncludeBOM(bool bVal)            { bIncludeBOM = bVal; }
     void    SetTextSep( sal_Unicode c )         { cTextSep = c; }
     void    SetStartRow( sal_Int32 nRow)        { nStartRow= nRow; }
     void    SetLanguage(LanguageType e)         { eLang = e; }
diff --git a/sc/source/ui/inc/imoptdlg.hxx b/sc/source/ui/inc/imoptdlg.hxx
index 2ffe9f4df77a..91bd9e460ae0 100644
--- a/sc/source/ui/inc/imoptdlg.hxx
+++ b/sc/source/ui/inc/imoptdlg.hxx
@@ -32,7 +32,7 @@ public:
             : nFieldSepCode(nFieldSep), nTextSepCode(nTextSep),
             bFixedWidth(false), bSaveAsShown(false), bQuoteAllText(false),
             bSaveNumberAsSuch(true), bSaveFormulas(false), bRemoveSpace(false),
-            bEvaluateFormulas(true), nSheetToExport(0)
+            bEvaluateFormulas(true), bIncludeBOM(false), nSheetToExport(0)
         { SetTextEncoding( nEnc ); }
 
     ScImportOptions& operator=( const ScImportOptions& rCpy ) = default;
@@ -52,6 +52,7 @@ public:
     bool        bSaveFormulas;
     bool        bRemoveSpace;
     bool        bEvaluateFormulas;
+    bool        bIncludeBOM;
     // "0" for 'current sheet', "-1" for all sheets (each to a separate file),
     // or 1-based specific sheet number (to a separate file).
     sal_Int32   nSheetToExport;
diff --git a/sc/source/ui/inc/impex.hxx b/sc/source/ui/inc/impex.hxx
index e11b5d510d61..2631500e98a9 100644
--- a/sc/source/ui/inc/impex.hxx
+++ b/sc/source/ui/inc/impex.hxx
@@ -71,6 +71,7 @@ class SC_DLLPUBLIC ScImportExport
     bool        mbOverwriting;  // Whether we could be overwriting existing 
values (paste).
                                 // In this case we cannot use the insert 
optimization, but we
                                 // do not need to broadcast after the import.
+    bool        mbIncludeBOM; // Whether to include a byte-order-mark in the 
output.
     ScExportTextOptions mExportTextOptions;
 
     std::unique_ptr<ScAsciiOptions> pExtOptions;        // extended options
@@ -158,6 +159,9 @@ public:
     void SetImportBroadcast( bool b ) { mbImportBroadcast = b; }
     void SetOverwriting( const bool bOverwriting ) { mbOverwriting = 
bOverwriting; }
     void SetExportTextOptions( const ScExportTextOptions& options ) { 
mExportTextOptions = options; }
+
+    bool GetIncludeBOM() const { return mbIncludeBOM; }
+    void SetIncludeBOM(bool bVal) { mbIncludeBOM = bVal; }
 };
 
 // Helper class for importing clipboard strings as streams.
diff --git a/sc/source/ui/unoobj/filtuno.cxx b/sc/source/ui/unoobj/filtuno.cxx
index a2b4d0f7e1c3..3831e9634622 100644
--- a/sc/source/ui/unoobj/filtuno.cxx
+++ b/sc/source/ui/unoobj/filtuno.cxx
@@ -231,6 +231,7 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute()
         rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
 
         OUString aTitle;
+        bool bIncludeBOM = false;
 
         if ( aFilterString == ScDocShell::GetAsciiFilterName() )
         {
@@ -245,6 +246,10 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute()
 
             aTitle = ScResId( STR_EXPORT_ASCII );
             bAscii = true;
+
+            ScAsciiOptions aOptions;
+            aOptions.ReadFromString(aFilterOptions);
+            bIncludeBOM = aOptions.GetIncludeBOM();
         }
         else if ( aFilterString == ScDocShell::GetLotusFilterName() )
         {
@@ -299,6 +304,7 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute()
         }
 
         ScImportOptions aOptions( cAsciiDel, cStrDel, eEncoding);
+        aOptions.bIncludeBOM = bIncludeBOM;
         if(skipDialog)
         {
             // TODO: check we are not missing some of the stuff that 
ScImportOptionsDlg::GetImportOptions

Reply via email to