include/svtools/htmlkywd.hxx       |    1 
 include/svtools/htmltokn.h         |    1 
 sc/CppunitTest_sc_filter_html.mk   |   82 +++++++++++++++++++++++++++++++++++++
 sc/Module_sc.mk                    |    1 
 sc/qa/filter/html/data/text.html   |    8 +++
 sc/qa/filter/html/html.cxx         |   62 +++++++++++++++++++++++++++
 sc/source/filter/html/htmlpars.cxx |   22 +++++++++
 svtools/source/svhtml/htmlkywd.cxx |    1 
 8 files changed, 178 insertions(+)

New commits:
commit c378582f965874dbe858fd1a64d6a7f90cf083a2
Author:     Miklos Vajna <[email protected]>
AuthorDate: Thu Feb 1 13:10:26 2024 +0100
Commit:     Caolán McNamara <[email protected]>
CommitDate: Fri Feb 2 18:19:08 2024 +0100

    tdf#159483 sc HTML import: handle data-sheets-value attribute for the text 
case
    
    The A2 cell in the bugdoc has 01 in it, which was auto-converted to 1
    (float) value on import, even if it was text originally.
    
    This is hard to solve for HTML in general, which is not typed, but this
    input is coming from google sheets, which has an additional
    data-sheets-value attribute on <td> that does tell us about the type of
    the cell.
    
    Fix the problem by handling that attribute, and in case it explicitly
    says it's text, then apply the matching number format.
    
    Other types are not yet handled.
    
    (cherry picked from commit e6e5660b726ecf3b0c39b277568568973b43c9f0)
    
    Change-Id: I2986ef864e97d9c46d191aba25ca5740a1151a71
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/162886
    Tested-by: Jenkins CollaboraOffice <[email protected]>
    Reviewed-by: Caolán McNamara <[email protected]>

diff --git a/include/svtools/htmlkywd.hxx b/include/svtools/htmlkywd.hxx
index cdcee168acba..1c0360404ced 100644
--- a/include/svtools/htmlkywd.hxx
+++ b/include/svtools/htmlkywd.hxx
@@ -445,6 +445,7 @@
 #define OOO_STRING_SVTOOLS_HTML_O_title "title"
 #define OOO_STRING_SVTOOLS_HTML_O_value "value"
 #define OOO_STRING_SVTOOLS_HTML_O_SDval "sdval"
+#define OOO_STRING_SVTOOLS_HTML_O_DSval "data-sheets-value"
 #define OOO_STRING_SVTOOLS_HTML_O_SDnum "sdnum"
 #define OOO_STRING_SVTOOLS_HTML_O_sdlibrary "sdlibrary"
 #define OOO_STRING_SVTOOLS_HTML_O_sdmodule "sdmodule"
diff --git a/include/svtools/htmltokn.h b/include/svtools/htmltokn.h
index 9dca8a8f3ea7..b18acd6dcb87 100644
--- a/include/svtools/htmltokn.h
+++ b/include/svtools/htmltokn.h
@@ -344,6 +344,7 @@ STRING_START        = BOOL_END,
     TITLE,
     VALUE,
     SDVAL, // StarDiv NumberValue
+    DSVAL,
     SDNUM, // StarDiv NumberFormat
     SDLIBRARY,
     SDMODULE,
diff --git a/sc/CppunitTest_sc_filter_html.mk b/sc/CppunitTest_sc_filter_html.mk
new file mode 100644
index 000000000000..b78349d64703
--- /dev/null
+++ b/sc/CppunitTest_sc_filter_html.mk
@@ -0,0 +1,82 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CppunitTest_CppunitTest,sc_filter_html))
+
+$(eval $(call gb_CppunitTest_use_common_precompiled_header,sc_filter_html))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sc_filter_html, \
+    sc/qa/filter/html/html \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,sc_filter_html, \
+       boost_headers \
+       libxml2 \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sc_filter_html, \
+    basegfx \
+    comphelper \
+    cppu \
+    cppuhelper \
+    drawinglayer \
+    drawinglayercore \
+    editeng \
+    for \
+    forui \
+    i18nlangtag \
+    msfilter \
+    oox \
+    sal \
+    salhelper \
+    sax \
+    sc \
+    scqahelper \
+    sfx \
+    sot \
+    subsequenttest \
+    svl \
+    svt \
+    svx \
+    svxcore \
+       test \
+    tk \
+    tl \
+    ucbhelper \
+       unotest \
+    utl \
+    vcl \
+    xo \
+))
+
+$(eval $(call gb_CppunitTest_set_include,sc_filter_html,\
+    -I$(SRCDIR)/sc/source/ui/inc \
+    -I$(SRCDIR)/sc/inc \
+    $$(INCLUDE) \
+))
+
+$(eval $(call gb_CppunitTest_use_api,sc_filter_html,\
+       udkapi \
+       offapi \
+       oovbaapi \
+))
+
+$(eval $(call gb_CppunitTest_use_packages,sc_filter_html, \
+    filter_xhtml \
+    filter_xslt \
+))
+
+$(eval $(call gb_CppunitTest_use_ure,sc_filter_html))
+$(eval $(call gb_CppunitTest_use_vcl,sc_filter_html))
+
+$(eval $(call gb_CppunitTest_use_rdb,sc_filter_html,services))
+
+$(eval $(call gb_CppunitTest_use_configuration,sc_filter_html))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sc/Module_sc.mk b/sc/Module_sc.mk
index d8017dd6f48a..cce6ad096980 100644
--- a/sc/Module_sc.mk
+++ b/sc/Module_sc.mk
@@ -90,6 +90,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,sc, \
        CppunitTest_sc_uicalc \
        CppunitTest_sc_vba_macro_test \
        CppunitTest_sc_a11y \
+       CppunitTest_sc_filter_html \
 ))
 
 ifneq ($(ENABLE_JUMBO_SHEETS),)
diff --git a/sc/qa/filter/html/data/text.html b/sc/qa/filter/html/data/text.html
new file mode 100644
index 000000000000..eadb34b5e1f8
--- /dev/null
+++ b/sc/qa/filter/html/data/text.html
@@ -0,0 +1,8 @@
+<table>
+  <tr>
+    <td data-sheets-value="{&quot;1&quot;:3,&quot;3&quot;:1}">1</td>
+  </tr>
+  <tr>
+    <td 
data-sheets-value="{&quot;1&quot;:2,&quot;2&quot;:&quot;01&quot;,&quot;6&quot;:1}">01</td>
+  </tr>
+</table>
diff --git a/sc/qa/filter/html/html.cxx b/sc/qa/filter/html/html.cxx
new file mode 100644
index 000000000000..76413c6455b4
--- /dev/null
+++ b/sc/qa/filter/html/html.cxx
@@ -0,0 +1,62 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <test/unoapixml_test.hxx>
+#include <test/htmltesttools.hxx>
+
+#include <com/sun/star/beans/XPropertySet.hpp>
+#include <com/sun/star/sheet/XSpreadsheetDocument.hpp>
+#include <com/sun/star/table/XCellRange.hpp>
+
+#include <comphelper/propertyvalue.hxx>
+
+using namespace com::sun::star;
+
+namespace
+{
+/// Covers sc/source/filter/html/ fixes.
+class Test : public UnoApiXmlTest, public HtmlTestTools
+{
+public:
+    Test()
+        : UnoApiXmlTest("/sc/qa/filter/html/data/")
+    {
+    }
+};
+
+CPPUNIT_TEST_FIXTURE(Test, testTdAsText)
+{
+    // Given a document with an A2 cell that contains "02" as text:
+    OUString aURL = createFileURL(u"text.html");
+
+    // When loading that document to Calc:
+    uno::Sequence<beans::PropertyValue> aParams = {
+        comphelper::makePropertyValue("DocumentService",
+                                      
OUString("com.sun.star.sheet.SpreadsheetDocument")),
+    };
+    loadWithParams(aURL, aParams);
+
+    // Then make sure "01" is not auto-converted to 1, as a number:
+    uno::Reference<sheet::XSpreadsheetDocument> xDocument(mxComponent, 
uno::UNO_QUERY);
+    uno::Reference<container::XIndexAccess> xSheets(xDocument->getSheets(), 
uno::UNO_QUERY);
+    uno::Reference<table::XCellRange> xSheet(xSheets->getByIndex(0), 
uno::UNO_QUERY);
+    uno::Reference<beans::XPropertySet> xCell(xSheet->getCellByPosition(0, 1), 
uno::UNO_QUERY);
+    table::CellContentType eType{};
+    xCell->getPropertyValue("CellContentType") >>= eType;
+    // Without the accompanying fix in place, this test would have failed with:
+    // - Expected: 2 (TEXT)
+    // - Actual  : 1 (VALUE)
+    // i.e. data-sheets-value was ignored on import.
+    CPPUNIT_ASSERT_EQUAL(table::CellContentType_TEXT, eType);
+}
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/filter/html/htmlpars.cxx 
b/sc/source/filter/html/htmlpars.cxx
index e8abde6c92d7..6a6a203dfdf9 100644
--- a/sc/source/filter/html/htmlpars.cxx
+++ b/sc/source/filter/html/htmlpars.cxx
@@ -63,6 +63,7 @@
 #include <rangelst.hxx>
 
 #include <orcus/css_parser.hpp>
+#include <boost/property_tree/json_parser.hpp>
 
 #include <com/sun/star/document/XDocumentProperties.hpp>
 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
@@ -2126,6 +2127,27 @@ void ScHTMLTable::DataOn( const HtmlImportInfo& rInfo )
                     }
                 }
                 break;
+                case HtmlOptionId::DSVAL:
+                {
+                    // data-sheets-value from google sheets, value is a JSON.
+                    OString aEncodedOption = rOption.GetString().toUtf8();
+                    const char* pEncodedOption = aEncodedOption.getStr();
+                    std::stringstream aStream(pEncodedOption);
+                    boost::property_tree::ptree aTree;
+                    boost::property_tree::read_json(aStream, aTree);
+                    // The "1" key describes the original data type.
+                    auto it = aTree.find("1");
+                    if (it != aTree.not_found())
+                    {
+                        int nValueType = 
std::stoi(it->second.get_value<std::string>());
+                        // 2 is text.
+                        if (nValueType == 2)
+                        {
+                            nNumberFormat = NF_STANDARD_FORMAT_TEXT;
+                        }
+                    }
+                }
+                break;
                 default: break;
             }
         }
diff --git a/svtools/source/svhtml/htmlkywd.cxx 
b/svtools/source/svhtml/htmlkywd.cxx
index 5f81b3e3ca30..2bdd3e897b30 100644
--- a/svtools/source/svhtml/htmlkywd.cxx
+++ b/svtools/source/svhtml/htmlkywd.cxx
@@ -524,6 +524,7 @@ static HTML_OptionEntry aHTMLOptionTab[] = {
     {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_title),     
HtmlOptionId::TITLE},
     {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_value),     
HtmlOptionId::VALUE},
     {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDval),     
HtmlOptionId::SDVAL}, // StarDiv NumberValue
+    {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_DSval),     
HtmlOptionId::DSVAL},
     {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDnum),     
HtmlOptionId::SDNUM}, // StarDiv NumberFormat
     {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdlibrary), 
HtmlOptionId::SDLIBRARY},
     {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdmodule),  
HtmlOptionId::SDMODULE},

Reply via email to