source

László Németh (via logerrit) Mon, 08 May 2023 10:00:56 -0700

 sw/inc/swscanner.hxx              |    2 
 sw/source/core/txtnode/txtedt.cxx |  125 ++++++++++++++++++++++++++------------
 2 files changed, 88 insertions(+), 39 deletions(-)


New commits:
commit 5619fc438273cd15e78539e78b8af751bca24b1a
Author:     László Németh <nem...@numbertext.org>
AuthorDate: Sun May 7 23:17:26 2023 +0200
Commit:     László Németh <nem...@numbertext.org>
CommitDate: Mon May 8 19:00:28 2023 +0200

    tdf#154499 sw spell checking: add 2-word phrase checking
    
    Hunspell dictionaries can contain phrases, i.e. space
    separated word sequences, which were used only to reject
    compounds and to give better suggestions. Now recognize
    2-word phrases in the text, no need to break the phrase
    into single words, e.g. "et" and "cetera", which resulted
    acceptance of typos (e.g. "et" without "cetera"), also
    bad suggestions (e.g. "et" and "cetera" independently from
    the context).
    
    More example:
    
    == old .dic file ==
    ...
    et
    cetera
    von
    Neumann
    veni
    vidi
    vici
    ...
    
    List the 2-word phrases, and break the 3 or more word
    into 2-word phrases:
    
    == new .dic file ==
    ...
    et cetera
    von Neumann
    veni vidi
    vidi vici
    ...
    
    Note: words of the phrase are separated by a space, but
    recognized also with punctuation, e.g. in the previous
    example, "Veni, vidi, vici."
    
    Note: during typing, the second word of the phrase
    will be accepted only at ending the paragraph, i.e.
    pressing Enter.
    
    Change-Id: I4a1487abc0e4ab31d09750ee8c817353e6325ca3
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151487
    Tested-by: László Németh <nem...@numbertext.org>
    Reviewed-by: László Németh <nem...@numbertext.org>

diff --git a/sw/inc/swscanner.hxx b/sw/inc/swscanner.hxx
index 5dd118ad0049..88433bfe4bb1 100644
--- a/sw/inc/swscanner.hxx
+++ b/sw/inc/swscanner.hxx
@@ -33,6 +33,7 @@ class SwScanner
 {
     std::function<LanguageType (sal_Int32, sal_Int32, bool)> m_pGetLangOfChar;
     OUString m_aWord;
+    OUString m_aPrevWord;
     const OUString m_aPreDashReplacementText;
     OUString m_aText;
     const LanguageType* m_pLanguage;
@@ -62,6 +63,7 @@ public:
     bool NextWord();
 
     const OUString& GetWord() const    { return m_aWord; }
+    const OUString& GetPrevWord() const { return m_aPrevWord; }
 
     sal_Int32 GetBegin() const         { return m_nBegin; }
     sal_Int32 GetEnd() const           { return m_nBegin + m_nLength; }
diff --git a/sw/source/core/txtnode/txtedt.cxx 
b/sw/source/core/txtnode/txtedt.cxx
index 528afabed561..1e9082201f7a 100644
--- a/sw/source/core/txtnode/txtedt.cxx
+++ b/sw/source/core/txtnode/txtedt.cxx
@@ -961,6 +961,7 @@ bool SwScanner::NextWord()
     if ( m_nWordType == i18n::WordType::WORD_COUNT )
         m_nLength = forceEachAsianCodePointToWord(m_aText, m_nBegin, 
m_nLength);
 
+    m_aPrevWord = m_aWord;
     m_aWord = m_aPreDashReplacementText.copy( m_nBegin, m_nLength );
 
     return true;
@@ -1035,8 +1036,11 @@ bool SwTextNode::Spell(SwSpellArgs* pArgs)
         SwScanner aScanner( *this, m_Text, nullptr, ModelToViewHelper(),
                             WordType::DICTIONARY_WORD,
                             nBegin, nEnd );
-        while( !pArgs->xSpellAlt.is() && aScanner.NextWord() )
+        bool bNextWord = aScanner.NextWord();
+        while( !pArgs->xSpellAlt.is() && bNextWord )
         {
+            bool bCalledNextWord = false;
+
             const OUString& rWord = aScanner.GetWord();
 
             // get next language for next word, consider language attributes
@@ -1066,25 +1070,45 @@ bool SwTextNode::Spell(SwSpellArgs* pArgs)
                     }
                     else
                     {
-                        // make sure the selection build later from the data
-                        // below does not include "in word" character to the
-                        // left and right in order to preserve those. Therefore
-                        // count those "in words" in order to modify the
-                        // selection accordingly.
-                        const sal_Unicode* pChar = rWord.getStr();
-                        sal_Int32 nLeft = 0;
-                        while (*pChar++ == CH_TXTATR_INWORD)
-                            ++nLeft;
-                        pChar = rWord.getLength() ? rWord.getStr() + 
rWord.getLength() - 1 : nullptr;
-                        sal_Int32 nRight = 0;
-                        while (pChar && *pChar-- == CH_TXTATR_INWORD)
-                            ++nRight;
-
-                        pArgs->pStartPos->Assign(*this, aScanner.GetEnd() - 
nRight );
-                        pArgs->pEndPos->Assign(*this, aScanner.GetBegin() + 
nLeft );
+                        OUString sPrevWord = aScanner.GetPrevWord();
+                        auto nWordBegin = aScanner.GetBegin();
+                        auto nWordEnd = aScanner.GetEnd();
+                        bNextWord = aScanner.NextWord();
+                        const OUString& rActualWord = aScanner.GetPrevWord();
+                        bCalledNextWord = true;
+                        // check space separated word pairs in the dictionary, 
e.g. "vice versa"
+                        if ( !((bNextWord && pArgs->xSpeller->isValid( 
rActualWord + " " + aScanner.GetWord(),
+                                static_cast<sal_uInt16>(eActLang), Sequence< 
PropertyValue >() )) ||
+                           ( !sPrevWord.isEmpty() && pArgs->xSpeller->isValid( 
sPrevWord + " " + rActualWord,
+                                static_cast<sal_uInt16>(eActLang), Sequence< 
PropertyValue >() ))) )
+                        {
+                            // make sure the selection build later from the 
data
+                            // below does not include "in word" character to 
the
+                            // left and right in order to preserve those. 
Therefore
+                            // count those "in words" in order to modify the
+                            // selection accordingly.
+                            const sal_Unicode* pChar = 
aScanner.GetPrevWord().getStr();
+                            sal_Int32 nLeft = 0;
+                            while (*pChar++ == CH_TXTATR_INWORD)
+                                ++nLeft;
+                            pChar = rActualWord.getLength() ? 
rActualWord.getStr() + rActualWord.getLength() - 1 : nullptr;
+                            sal_Int32 nRight = 0;
+                            while (pChar && *pChar-- == CH_TXTATR_INWORD)
+                                ++nRight;
+
+                            pArgs->pStartPos->Assign(*this, nWordEnd - nRight 
);
+                            pArgs->pEndPos->Assign(*this, nWordBegin + nLeft );
+                        }
+                        else
+                        {
+                            pArgs->xSpellAlt = nullptr;
+                        }
                     }
                 }
             }
+
+            if ( !bCalledNextWord )
+                bNextWord = aScanner.NextWord();
         }
     }
 
@@ -1342,11 +1366,13 @@ SwRect SwTextFrame::AutoSpell_(SwTextNode & rNode, 
sal_Int32 nActPos)
         SwScanner aScanner( *pNode, pNode->GetText(), nullptr, 
ModelToViewHelper(),
                             WordType::DICTIONARY_WORD, nBegin, nEnd);
 
-        while( aScanner.NextWord() )
+        bool bNextWord = aScanner.NextWord();
+        while( bNextWord )
         {
             const OUString& rWord = aScanner.GetWord();
             nBegin = aScanner.GetBegin();
             sal_Int32 nLen = aScanner.GetLen();
+            bool bCalledNextWord = false;
 
             // get next language for next word, consider language attributes
             // within the word
@@ -1365,31 +1391,49 @@ SwRect SwTextFrame::AutoSpell_(SwTextNode & rNode, 
sal_Int32 nActPos)
                     ((!bRestoreString && !bContainsComments) || 
!xSpell->isValid( rWord.replaceAll(OUStringChar(CH_TXTATR_INWORD), ""),
                             static_cast<sal_uInt16>(eActLang), Sequence< 
PropertyValue >() ) ) )
                 {
-                    sal_Int32 nSmartTagStt = nBegin;
-                    sal_Int32 nDummy = 1;
-                    if ( !pNode->GetSmartTags() || 
!pNode->GetSmartTags()->InWrongWord( nSmartTagStt, nDummy ) )
+                    OUString sPrevWord = aScanner.GetPrevWord();
+                    bNextWord = aScanner.NextWord();
+                    bCalledNextWord = true;
+                    // check space separated word pairs in the dictionary, 
e.g. "vice versa"
+                    if ( !((bNextWord && xSpell->isValid( 
aScanner.GetPrevWord() + " " + aScanner.GetWord(),
+                                static_cast<sal_uInt16>(eActLang), Sequence< 
PropertyValue >() )) ||
+                           (!sPrevWord.isEmpty() && xSpell->isValid( sPrevWord 
+ " " + aScanner.GetPrevWord(),
+                                static_cast<sal_uInt16>(eActLang), Sequence< 
PropertyValue >() ))) )
                     {
-                        if( !pNode->GetWrong() )
-                        {
-                            pNode->SetWrong( std::make_unique<SwWrongList>( 
WRONGLIST_SPELL ) );
-                            pNode->GetWrong()->SetInvalid( 0, nEnd );
-                        }
-                        SwWrongList::FreshState const 
eState(pNode->GetWrong()->Fresh(
-                            nChgStart, nChgEnd, nBegin, nLen, nInsertPos, 
nActPos));
-                        switch (eState)
+                        sal_Int32 nSmartTagStt = nBegin;
+                        sal_Int32 nDummy = 1;
+                        if ( !pNode->GetSmartTags() || 
!pNode->GetSmartTags()->InWrongWord( nSmartTagStt, nDummy ) )
                         {
-                            case SwWrongList::FreshState::FRESH:
-                                pNode->GetWrong()->Insert(OUString(), nullptr, 
nBegin, nLen, nInsertPos++);
-                                break;
-                            case SwWrongList::FreshState::CURSOR:
-                                bPending = true;
-                                [[fallthrough]]; // to mark as invalid
-                            case SwWrongList::FreshState::NOTHING:
-                                nInvStart = nBegin;
-                                nInvEnd = nBegin + nLen;
-                                break;
+                            if( !pNode->GetWrong() )
+                            {
+                                pNode->SetWrong( 
std::make_unique<SwWrongList>( WRONGLIST_SPELL ) );
+                                pNode->GetWrong()->SetInvalid( 0, nEnd );
+                            }
+                            SwWrongList::FreshState const 
eState(pNode->GetWrong()->Fresh(
+                                nChgStart, nChgEnd, nBegin, nLen, nInsertPos, 
nActPos));
+                            switch (eState)
+                            {
+                                case SwWrongList::FreshState::FRESH:
+                                    pNode->GetWrong()->Insert(OUString(), 
nullptr, nBegin, nLen, nInsertPos++);
+                                    break;
+                                case SwWrongList::FreshState::CURSOR:
+                                    bPending = true;
+                                    [[fallthrough]]; // to mark as invalid
+                                case SwWrongList::FreshState::NOTHING:
+                                    nInvStart = nBegin;
+                                    nInvEnd = nBegin + nLen;
+                                    break;
+                            }
                         }
                     }
+                    else if( bAddAutoCmpl && rACW.GetMinWordLen() <= 
aScanner.GetPrevWord().getLength() )
+                    {
+                        // tdf#119695 only add the word if the cursor position 
is outside the word
+                        // so that the incomplete words are not added as 
autocomplete candidates
+                        bool bCursorOutsideWord = nActPos > nBegin + nLen || 
nActPos < nBegin;
+                        if (bCursorOutsideWord)
+                            rACW.InsertWord(aScanner.GetPrevWord(), rDoc);
+                    }
                 }
                 else if( bAddAutoCmpl && rACW.GetMinWordLen() <= 
rWord.getLength() )
                 {
@@ -1400,6 +1444,9 @@ SwRect SwTextFrame::AutoSpell_(SwTextNode & rNode, 
sal_Int32 nActPos)
                         rACW.InsertWord(rWord, rDoc);
                 }
             }
+
+            if ( !bCalledNextWord )
+                 bNextWord = aScanner.NextWord();
         }
     }

[Libreoffice-commits] core.git: sw/inc sw/source

Reply via email to