cui/inc/strings.hrc                                        |    1 
 cui/source/inc/autocdlg.hxx                                |    2 
 cui/source/tabpages/autocdlg.cxx                           |   15 +++
 editeng/source/misc/acorrcfg.cxx                           |   18 +++
 editeng/source/misc/svxacorr.cxx                           |   18 +++
 editeng/source/misc/swafopt.cxx                            |    1 
 include/editeng/svxacorr.hxx                               |    6 +
 include/editeng/swafopt.hxx                                |    1 
 include/svl/urihelper.hxx                                  |    5 +
 officecfg/registry/schema/org/openoffice/Office/Common.xcs |   10 ++
 officecfg/registry/schema/org/openoffice/Office/Writer.xcs |    8 +
 svl/qa/unit/test_URIHelper.cxx                             |   54 +++++++++++
 svl/source/misc/urihelper.cxx                              |   59 +++++++++++++
 sw/inc/comcore.hxx                                         |    3 
 sw/inc/utlui.hrc                                           |    3 
 sw/source/core/edit/autofmt.cxx                            |    9 +
 sw/source/uibase/docvw/edtwin.cxx                          |    5 -
 sw/source/uibase/shells/textsh.cxx                         |    3 
 sw/source/uibase/wrtsh/wrtsh1.cxx                          |    3 
 19 files changed, 210 insertions(+), 14 deletions(-)

New commits:
commit a772976f047882918d5386a3ef9226c4aa2aa118
Author:     Baole Fang <baole.f...@gmail.com>
AuthorDate: Tue Apr 25 00:33:01 2023 -0400
Commit:     Stephan Bergmann <sberg...@redhat.com>
CommitDate: Thu Apr 27 15:27:33 2023 +0200

    tdf#145925: Add DOI recognition
    
    Detect DOI string in the form of "doi:10.*" and add hyperlink to it.
    It works the same way as url recognition.
    
    Change-Id: I3c4e78a110fd81ad7e727d5e9acee7e51127466a
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/150954
    Tested-by: Jenkins
    Reviewed-by: Heiko Tietze <heiko.tie...@documentfoundation.org>
    Reviewed-by: Stephan Bergmann <sberg...@redhat.com>

diff --git a/cui/inc/strings.hrc b/cui/inc/strings.hrc
index 498e60e75bc2..442ac515ef8c 100644
--- a/cui/inc/strings.hrc
+++ b/cui/inc/strings.hrc
@@ -332,6 +332,7 @@
 #define RID_CUISTR_BOLD_UNDER                       
NC_("RID_SVXSTR_BOLD_UNDER", "Automatic *bold*, /italic/, -strikeout- and 
_underline_")
 #define RID_CUISTR_NO_DBL_SPACES                    
NC_("RID_SVXSTR_NO_DBL_SPACES", "Ignore double spaces")
 #define RID_CUISTR_DETECT_URL                       
NC_("RID_SVXSTR_DETECT_URL", "URL Recognition")
+#define RID_CUISTR_DETECT_DOI                       
NC_("RID_SVXSTR_DETECT_DOI", "DOI citation recognition")
 #define RID_CUISTR_DASH                             NC_("RID_SVXSTR_DASH", 
"Replace dashes")
 #define RID_CUISTR_CORRECT_ACCIDENTAL_CAPS_LOCK     
NC_("RID_SVXSTR_CORRECT_ACCIDENTAL_CAPS_LOCK", "Correct accidental use of cAPS 
LOCK key")
 #define RID_CUISTR_NON_BREAK_SPACE                  
NC_("RID_SVXSTR_NON_BREAK_SPACE", "Add non-breaking space before specific 
punctuation marks in French text")
diff --git a/cui/source/inc/autocdlg.hxx b/cui/source/inc/autocdlg.hxx
index 7b4842314779..2357677ef65f 100644
--- a/cui/source/inc/autocdlg.hxx
+++ b/cui/source/inc/autocdlg.hxx
@@ -57,6 +57,7 @@ private:
     OUString m_sStartCap;
     OUString m_sBoldUnderline;
     OUString m_sURL;
+    OUString m_sDOI;
     OUString m_sNoDblSpaces;
     OUString m_sDash;
     OUString m_sAccidentalCaps;
@@ -92,6 +93,7 @@ class OfaSwAutoFmtOptionsPage : public SfxTabPage
     OUString        sNoDblSpaces;
     OUString        sCorrectCapsLock;
     OUString        sDetectURL;
+    OUString        sDetectDOI;
     OUString        sDash;
     OUString        sRightMargin;
     OUString        sNum;
diff --git a/cui/source/tabpages/autocdlg.cxx b/cui/source/tabpages/autocdlg.cxx
index aea3f7d9727e..38a261ad5737 100644
--- a/cui/source/tabpages/autocdlg.cxx
+++ b/cui/source/tabpages/autocdlg.cxx
@@ -189,6 +189,7 @@ 
OfaAutocorrOptionsPage::OfaAutocorrOptionsPage(weld::Container* pPage, weld::Dia
     , m_sStartCap(CuiResId(RID_CUISTR_CPTL_STT_SENT))
     , m_sBoldUnderline(CuiResId(RID_CUISTR_BOLD_UNDER))
     , m_sURL(CuiResId(RID_CUISTR_DETECT_URL))
+    , m_sDOI(CuiResId(RID_CUISTR_DETECT_DOI))
     , m_sNoDblSpaces(CuiResId(RID_CUISTR_NO_DBL_SPACES))
     , m_sDash(CuiResId(RID_CUISTR_DASH))
     , m_sAccidentalCaps(CuiResId(RID_CUISTR_CORRECT_ACCIDENTAL_CAPS_LOCK))
@@ -223,6 +224,7 @@ bool OfaAutocorrOptionsPage::FillItemSet( SfxItemSet* )
     pAutoCorrect->SetAutoCorrFlag(ACFlags::CapitalStartSentence, 
m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
     pAutoCorrect->SetAutoCorrFlag(ACFlags::ChgWeightUnderl,      
m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
     pAutoCorrect->SetAutoCorrFlag(ACFlags::SetINetAttr,          
m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
+    pAutoCorrect->SetAutoCorrFlag(ACFlags::SetDOIAttr,           
m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
     pAutoCorrect->SetAutoCorrFlag(ACFlags::ChgToEnEmDash,        
m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
     pAutoCorrect->SetAutoCorrFlag(ACFlags::IgnoreDoubleSpace,    
m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
     pAutoCorrect->SetAutoCorrFlag(ACFlags::CorrectCapsLock,      
m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
@@ -263,6 +265,7 @@ void OfaAutocorrOptionsPage::Reset( const SfxItemSet* )
     InsertEntry(m_sStartCap);
     InsertEntry(m_sBoldUnderline);
     InsertEntry(m_sURL);
+    InsertEntry(m_sDOI);
     InsertEntry(m_sDash);
     InsertEntry(m_sNoDblSpaces);
     InsertEntry(m_sAccidentalCaps);
@@ -273,6 +276,7 @@ void OfaAutocorrOptionsPage::Reset( const SfxItemSet* )
     m_xCheckLB->set_toggle( nPos++, bool(nFlags & 
ACFlags::CapitalStartSentence) ? TRISTATE_TRUE : TRISTATE_FALSE );
     m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::ChgWeightUnderl) ? 
TRISTATE_TRUE : TRISTATE_FALSE );
     m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::SetINetAttr) ? 
TRISTATE_TRUE : TRISTATE_FALSE );
+    m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::SetDOIAttr) ? 
TRISTATE_TRUE : TRISTATE_FALSE );
     m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::ChgToEnEmDash) ? 
TRISTATE_TRUE : TRISTATE_FALSE );
     m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::IgnoreDoubleSpace) 
? TRISTATE_TRUE : TRISTATE_FALSE );
     m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::CorrectCapsLock) ? 
TRISTATE_TRUE : TRISTATE_FALSE );
@@ -333,6 +337,7 @@ enum OfaAutoFmtOptions
     BEGIN_UPPER,
     BOLD_UNDERLINE,
     DETECT_URL,
+    DETECT_DOI,
     REPLACE_DASHES,
     DEL_SPACES_AT_STT_END,
     DEL_SPACES_BETWEEN_LINES,
@@ -363,6 +368,7 @@ 
OfaSwAutoFmtOptionsPage::OfaSwAutoFmtOptionsPage(weld::Container* pPage, weld::D
     , sNoDblSpaces(CuiResId(RID_CUISTR_NO_DBL_SPACES))
     , sCorrectCapsLock(CuiResId(RID_CUISTR_CORRECT_ACCIDENTAL_CAPS_LOCK))
     , sDetectURL(CuiResId(RID_CUISTR_DETECT_URL))
+    , sDetectDOI(CuiResId(RID_CUISTR_DETECT_DOI))
     , sDash(CuiResId(RID_CUISTR_DASH))
     , sRightMargin(CuiResId(RID_CUISTR_RIGHT_MARGIN))
     , sNum(CuiResId(RID_CUISTR_NUM))
@@ -455,6 +461,12 @@ bool OfaSwAutoFmtOptionsPage::FillItemSet( SfxItemSet*  )
     pAutoCorrect->SetAutoCorrFlag(ACFlags::SetINetAttr,
                         m_xCheckLB->get_toggle(DETECT_URL, CBCOL_SECOND) == 
TRISTATE_TRUE);
 
+    bCheck = m_xCheckLB->get_toggle(DETECT_DOI, CBCOL_FIRST) == TRISTATE_TRUE;
+    bModified |= pOpt->bSetDOIAttr != bCheck;
+    pOpt->bSetDOIAttr = bCheck;
+    pAutoCorrect->SetAutoCorrFlag(ACFlags::SetDOIAttr,
+                        m_xCheckLB->get_toggle(DETECT_DOI, CBCOL_SECOND) == 
TRISTATE_TRUE);
+
     bCheck = m_xCheckLB->get_toggle(DEL_EMPTY_NODE, CBCOL_FIRST) == 
TRISTATE_TRUE;
     bModified |= pOpt->bDelEmptyNode != bCheck;
     pOpt->bDelEmptyNode = bCheck;
@@ -558,6 +570,7 @@ void OfaSwAutoFmtOptionsPage::Reset( const SfxItemSet* )
     CreateEntry(sCapitalStartSentence, CBCOL_BOTH  );
     CreateEntry(sBoldUnder,         CBCOL_BOTH  );
     CreateEntry(sDetectURL,         CBCOL_BOTH  );
+    CreateEntry(sDetectDOI,         CBCOL_BOTH  );
     CreateEntry(sDash,              CBCOL_BOTH  );
     CreateEntry(sDelSpaceAtSttEnd,  CBCOL_BOTH  );
     CreateEntry(sDelSpaceBetweenLines, CBCOL_BOTH  );
@@ -583,6 +596,8 @@ void OfaSwAutoFmtOptionsPage::Reset( const SfxItemSet* )
     m_xCheckLB->set_toggle(BOLD_UNDERLINE, bool(nFlags & 
ACFlags::ChgWeightUnderl) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND);
     m_xCheckLB->set_toggle(DETECT_URL, pOpt->bSetINetAttr ? TRISTATE_TRUE : 
TRISTATE_FALSE, CBCOL_FIRST);
     m_xCheckLB->set_toggle(DETECT_URL, bool(nFlags & ACFlags::SetINetAttr) ? 
TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND);
+    m_xCheckLB->set_toggle(DETECT_DOI, pOpt->bSetDOIAttr ? TRISTATE_TRUE : 
TRISTATE_FALSE, CBCOL_FIRST);
+    m_xCheckLB->set_toggle(DETECT_DOI, bool(nFlags & ACFlags::SetDOIAttr) ? 
TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND);
     m_xCheckLB->set_toggle(REPLACE_DASHES, pOpt->bChgToEnEmDash ? 
TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST);
     m_xCheckLB->set_toggle(REPLACE_DASHES, bool(nFlags & 
ACFlags::ChgToEnEmDash) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND);
     m_xCheckLB->set_toggle(DEL_SPACES_AT_STT_END, 
pOpt->bAFormatDelSpacesAtSttEnd ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST);
diff --git a/editeng/source/misc/acorrcfg.cxx b/editeng/source/misc/acorrcfg.cxx
index 787663a96f22..8603b4347da1 100644
--- a/editeng/source/misc/acorrcfg.cxx
+++ b/editeng/source/misc/acorrcfg.cxx
@@ -185,9 +185,10 @@ Sequence<OUString>  SvxBaseAutoCorrCfg::GetPropertyNames()
         "DoubleQuoteAtEnd",                     // 16
         "CorrectAccidentalCapsLock",            // 17
         "TransliterateRTL",                     // 18
-        "ChangeAngleQuotes"                     // 19
+        "ChangeAngleQuotes",                    // 19
+        "SetDOIAttribute",                      // 20
     };
-    const int nCount = 20;
+    const int nCount = 21;
     Sequence<OUString> aNames(nCount);
     OUString* pNames = aNames.getArray();
     for(int i = 0; i < nCount; i++)
@@ -298,6 +299,10 @@ void SvxBaseAutoCorrCfg::Load(bool bInit)
                     if(*o3tl::doAccess<bool>(pValues[nProp]))
                         nFlags |= ACFlags::ChgAngleQuotes;
                 break;//"ChangeAngleQuotes"
+                case  20:
+                    if(*o3tl::doAccess<bool>(pValues[nProp]))
+                        nFlags |= ACFlags::SetDOIAttr;
+                break;//"SetDOIAttr",
             }
         }
     }
@@ -333,6 +338,7 @@ void SvxBaseAutoCorrCfg::ImplCommit()
          css::uno::Any(bool(nFlags & ACFlags::ChgWeightUnderl)),
             // "ChangeUnderlineWeight"
          css::uno::Any(bool(nFlags & ACFlags::SetINetAttr)), // 
"SetInetAttribute"
+         css::uno::Any(bool(nFlags & ACFlags::SetDOIAttr)), // "SetDOIAttr"
          css::uno::Any(bool(nFlags & ACFlags::ChgOrdinalNumber)),
             // "ChangeOrdinalNumber"
          css::uno::Any(bool(nFlags & ACFlags::AddNonBrkSpace)), // 
"AddNonBreakingSpace"
@@ -414,8 +420,9 @@ Sequence<OUString>  SvxSwAutoCorrCfg::GetPropertyNames()
         "Format/ByInput/ApplyNumbering/SpecialCharacter/FontFamily",    //44
         "Format/ByInput/ApplyNumbering/SpecialCharacter/FontCharset",   //45
         "Format/ByInput/ApplyNumbering/SpecialCharacter/FontPitch",     //46
+        "Format/Option/SetDOIAttribute",                                //47
     };
-    const int nCount = 47;
+    const int nCount = 48;
     Sequence<OUString> aNames(nCount);
     OUString* pNames = aNames.getArray();
     for(int i = 0; i < nCount; i++)
@@ -565,6 +572,7 @@ void SvxSwAutoCorrCfg::Load(bool bInit)
                     rSwFlags.aByInputBulletFont.SetPitch(FontPitch(nVal));
                 }
                 break;// 
"Format/ByInput/ApplyNumbering/SpecialCharacter/FontPitch",
+                case   47: rSwFlags.bSetDOIAttr = 
*o3tl::doAccess<bool>(pValues[nProp]); break; // 
"Format/Option/SetDOIAttribute",
             }
         }
     }
@@ -666,8 +674,10 @@ void SvxSwAutoCorrCfg::ImplCommit()
             // "Format/ByInput/ApplyNumbering/SpecialCharacter/FontFamily"
          css::uno::Any(sal_Int32(rSwFlags.aByInputBulletFont.GetCharSet())),
             // "Format/ByInput/ApplyNumbering/SpecialCharacter/FontCharset"
-         css::uno::Any(sal_Int32(rSwFlags.aByInputBulletFont.GetPitch()))});
+         css::uno::Any(sal_Int32(rSwFlags.aByInputBulletFont.GetPitch())),
             // "Format/ByInput/ApplyNumbering/SpecialCharacter/FontPitch"
+         css::uno::Any(rSwFlags.bSetDOIAttr)});
+            // "Format/Option/SetDOIAttribute"
 }
 
 void SvxSwAutoCorrCfg::Notify( const Sequence<OUString>& /* aPropertyNames */ )
diff --git a/editeng/source/misc/svxacorr.cxx b/editeng/source/misc/svxacorr.cxx
index 5e229ba10d24..68743034e66b 100644
--- a/editeng/source/misc/svxacorr.cxx
+++ b/editeng/source/misc/svxacorr.cxx
@@ -289,6 +289,7 @@ ACFlags SvxAutoCorrect::GetDefaultFlags()
                     | ACFlags::ChgAngleQuotes
                     | ACFlags::ChgWeightUnderl
                     | ACFlags::SetINetAttr
+                    | ACFlags::SetDOIAttr
                     | ACFlags::ChgQuotes
                     | ACFlags::SaveWordCplSttLst
                     | ACFlags::SaveWordWordStartLst
@@ -752,6 +753,18 @@ bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, 
const OUString& rTxt,
     return bRet;
 }
 
+// DOI citation recognition
+bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
+                                    sal_Int32 nSttPos, sal_Int32 nEndPos,
+                                    LanguageType eLang )
+{
+    OUString sURL( URIHelper::FindFirstDOIInText( rTxt, nSttPos, nEndPos, 
GetCharClass( eLang ) ));
+    bool bRet = !sURL.isEmpty();
+    if( bRet )          // so, set attribute:
+        rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
+    return bRet;
+}
+
 // Automatic *bold*, /italic/, -strikeout- and _underline_
 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& 
rTxt,
                                         sal_Int32 nEndPos )
@@ -1609,7 +1622,10 @@ void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& 
rDoc, const OUString& rTxt,
                 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) 
) ||
             ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
                 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
-                FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
+                FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
+            ( IsAutoCorrFlag( ACFlags::SetDOIAttr ) &&
+                ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
+                FnSetDOIAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
             ;
         else
         {
diff --git a/editeng/source/misc/swafopt.cxx b/editeng/source/misc/swafopt.cxx
index 293554589438..f6de4098d2e4 100644
--- a/editeng/source/misc/swafopt.cxx
+++ b/editeng/source/misc/swafopt.cxx
@@ -36,6 +36,7 @@ SvxSwAutoFormatFlags::SvxSwAutoFormatFlags()
     bChgToEnEmDash =
     bChgWeightUnderl =
     bSetINetAttr =
+    bSetDOIAttr =
     bAFormatDelSpacesAtSttEnd =
     bAFormatDelSpacesBetweenLines =
     bAFormatByInpDelSpacesAtSttEnd =
diff --git a/include/editeng/svxacorr.hxx b/include/editeng/svxacorr.hxx
index 39a3d4c65f81..eddf8c9296a1 100644
--- a/include/editeng/svxacorr.hxx
+++ b/include/editeng/svxacorr.hxx
@@ -76,13 +76,14 @@ enum class ACFlags : sal_uInt32 {
     CorrectCapsLock      = 0x00002000,   // Correct accidental use of cAPS 
LOCK key
     TransliterateRTL     = 0x00004000,   // Transliterate RTL text
     ChgAngleQuotes       = 0x00008000,   // >>, << -> angle quotes in some 
languages
+    SetDOIAttr           = 0x00010000,   // Set DOIAttribut
 
     ChgWordLstLoad       = 0x20000000,   // Replacement list loaded
     CplSttLstLoad        = 0x40000000,   // Exception list for Capital letters 
Start loaded
     WordStartLstLoad        = 0x80000000,   // Exception list for Word Start 
loaded
 };
 namespace o3tl {
-    template<> struct typed_flags<ACFlags> : is_typed_flags<ACFlags, 
0xe000ffff> {};
+    template<> struct typed_flags<ACFlags> : is_typed_flags<ACFlags, 
0xe001ffff> {};
 }
 
 enum class ACQuotes
@@ -416,6 +417,9 @@ public:
     bool FnSetINetAttr( SvxAutoCorrDoc&, const OUString&,
                                 sal_Int32 nSttPos, sal_Int32 nEndPos,
                                 LanguageType eLang );
+    bool FnSetDOIAttr( SvxAutoCorrDoc&, const OUString&,
+                                sal_Int32 nSttPos, sal_Int32 nEndPos,
+                                LanguageType eLang );
     bool FnChgWeightUnderl( SvxAutoCorrDoc&, const OUString&,
                                 sal_Int32 nEndPos );
     void FnCapitalStartSentence( SvxAutoCorrDoc&, const OUString&, bool 
bNormalPos,
diff --git a/include/editeng/swafopt.hxx b/include/editeng/swafopt.hxx
index 180ba6d1be67..71919383da96 100644
--- a/include/editeng/swafopt.hxx
+++ b/include/editeng/swafopt.hxx
@@ -109,6 +109,7 @@ struct EDITENG_DLLPUBLIC SvxSwAutoFormatFlags
     bool bChgAngleQuotes : 1;
     bool bChgWeightUnderl : 1;
     bool bSetINetAttr : 1;
+    bool bSetDOIAttr : 1;
 
     bool bSetBorder : 1;
     bool bCreateTable : 1;
diff --git a/include/svl/urihelper.hxx b/include/svl/urihelper.hxx
index 9f8588c97295..68843c17a85c 100644
--- a/include/svl/urihelper.hxx
+++ b/include/svl/urihelper.hxx
@@ -120,6 +120,11 @@ SVL_DLLPUBLIC OUString FindFirstURLInText(OUString const & 
rText,
                                           INetURLObject::EncodeMechanism 
eMechanism = INetURLObject::EncodeMechanism::WasEncoded,
                                           rtl_TextEncoding eCharset = 
RTL_TEXTENCODING_UTF8);
 
+SVL_DLLPUBLIC OUString FindFirstDOIInText(OUString const & rText,
+                                          sal_Int32 & rBegin,
+                                          sal_Int32 & rEnd,
+                                          CharClass const & rCharClass);
+
 /** Remove any password component from both absolute and relative URLs.
 
     @ATT  The current implementation will not remove a password from a
diff --git a/officecfg/registry/schema/org/openoffice/Office/Common.xcs 
b/officecfg/registry/schema/org/openoffice/Office/Common.xcs
index 9beda4a62c81..3bba44384b37 100644
--- a/officecfg/registry/schema/org/openoffice/Office/Common.xcs
+++ b/officecfg/registry/schema/org/openoffice/Office/Common.xcs
@@ -1335,6 +1335,16 @@
         </info>
         <value>true</value>
       </prop>
+      <prop oor:name="SetDOIAttribute" oor:type="xs:boolean" 
oor:nillable="false">
+        <!-- UIHints: Tools  AutoCorrect/AutoFormat  Options - DOI
+             recognition -->
+        <info>
+          <desc>Specifies if character strings which could represent a DOI
+          should be converted to a hyperlink.</desc>
+          <label>Detect DOI</label>
+        </info>
+        <value>true</value>
+      </prop>
       <prop oor:name="ChangeOrdinalNumber" oor:type="xs:boolean" 
oor:nillable="false">
         <!-- UIHints: Tools  AutoCorrect/AutoFormat  Options - Replace
              1st... -->
diff --git a/officecfg/registry/schema/org/openoffice/Office/Writer.xcs 
b/officecfg/registry/schema/org/openoffice/Office/Writer.xcs
index bc84344942f4..7f0f55de7147 100644
--- a/officecfg/registry/schema/org/openoffice/Office/Writer.xcs
+++ b/officecfg/registry/schema/org/openoffice/Office/Writer.xcs
@@ -4173,6 +4173,14 @@
             </info>
             <value>true</value>
           </prop>
+          <prop oor:name="SetDOIAttribute" oor:type="xs:boolean" 
oor:nillable="false">
+            <!-- UIHints: Tools  AutoCorrect/AutoFormat  Options - DOI 
citation recognition -->
+            <info>
+              <desc>Specifies whether character strings which could represent 
a DOI are converted to a hyperlink.</desc>
+              <label>Detect DOI</label>
+            </info>
+            <value>true</value>
+          </prop>
           <prop oor:name="ChangeOrdinalNumber" oor:type="xs:boolean" 
oor:nillable="false">
             <!-- UIHints: Tools  AutoCorrect/AutoFormat  Options - Replace 
1st... -->
             <info>
diff --git a/svl/qa/unit/test_URIHelper.cxx b/svl/qa/unit/test_URIHelper.cxx
index eb5135cbe3c6..df9e5d5114b8 100644
--- a/svl/qa/unit/test_URIHelper.cxx
+++ b/svl/qa/unit/test_URIHelper.cxx
@@ -181,11 +181,14 @@ public:
 
     void testFindFirstURLInText();
 
+    void testFindFirstDOIInText();
+
     void testResolveIdnaHost();
 
     CPPUNIT_TEST_SUITE(Test);
     CPPUNIT_TEST(testNormalizedMakeRelative);
     CPPUNIT_TEST(testFindFirstURLInText);
+    CPPUNIT_TEST(testFindFirstDOIInText);
     CPPUNIT_TEST(testResolveIdnaHost);
     CPPUNIT_TEST(finish);
     CPPUNIT_TEST_SUITE_END();
@@ -398,6 +401,57 @@ void Test::testFindFirstURLInText() {
     }
 }
 
+void Test::testFindFirstDOIInText() {
+    struct Data {
+        char const * input;
+        char const * result;
+        sal_Int32 begin;
+        sal_Int32 end;
+    };
+    static Data const tests[] = {
+        { "doi:10.1000/182", "https://doi.org/10.1000/182";, 0, 15 }, // valid 
doi suffix with only digits
+        { "doi:10.1038/nature03001", "https://doi.org/10.1038/nature03001";, 0, 
23 }, // valid doi suffix with alphanumeric characters
+        { "doi:10.1093/ajae/aaq063", "https://doi.org/10.1093/ajae/aaq063";, 0, 
23 }, // valid doi suffix with multiple slash
+        { "doi:10.1016/S0735-1097(98)00347-7", 
"https://doi.org/10.1016/S0735-1097(98)00347-7", 0, 33 }, // valid doi suffix 
with characters apart from alphanumeric
+        { "doi:10.109/ajae/aaq063", nullptr, 0, 0 }, // # of digits after 
doi;10. is not between 4 and 9
+        { "doi:10.1234567890/ajae/aaq063", nullptr, 0, 0 }, // # of digits 
after doi;10. is not between 4 and 9
+        { "doi:10.1093/ajae/aaq063/", nullptr, 0, 0 }, // nothing after slash
+        { "doi:10.1093", nullptr, 0, 0 }, // no slash
+        { "doi:11.1093/ajae/aaq063", nullptr, 0, 0 }, // doesn't begin with 
doi:10.
+    };
+    CharClass charClass( m_context, LanguageTag( css::lang::Locale("en", "US", 
"")));
+    for (std::size_t i = 0; i < SAL_N_ELEMENTS(tests); ++i) {
+        OUString input(OUString::createFromAscii(tests[i].input));
+        sal_Int32 begin = 0;
+        sal_Int32 end = input.getLength();
+        OUString result(
+            URIHelper::FindFirstDOIInText(input, begin, end, charClass));
+        bool ok = tests[i].result == nullptr
+            ? (result.getLength() == 0 && begin == input.getLength()
+               && end == input.getLength())
+            : (result.equalsAscii(tests[i].result) && begin == tests[i].begin
+               && end == tests[i].end);
+        OString msg;
+        if (!ok) {
+            OStringBuffer buf;
+            buf.append(OString::Concat("\"")
+                + tests[i].input
+                + "\" -> ");
+            buf.append(tests[i].result == nullptr ? "none" : tests[i].result);
+            buf.append(" ("
+                + OString::number(tests[i].begin)
+                + ", "
+                + OString::number(tests[i].end)
+                + ")"
+                " != "
+                + OUStringToOString(result, RTL_TEXTENCODING_UTF8)
+                + " (" + OString::number(begin) + ", " + OString::number(end) 
+")");
+            msg = buf.makeStringAndClear();
+        }
+        CPPUNIT_ASSERT_MESSAGE(msg.getStr(), ok);
+    }
+}
+
 void Test::testResolveIdnaHost() {
     OUString input;
 
diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx
index 6f121fba56d9..0043b7883a87 100644
--- a/svl/source/misc/urihelper.cxx
+++ b/svl/source/misc/urihelper.cxx
@@ -745,6 +745,65 @@ OUString URIHelper::FindFirstURLInText(OUString const & 
rText,
     return OUString();
 }
 
+OUString URIHelper::FindFirstDOIInText(OUString const & rText,
+                                       sal_Int32 & rBegin,
+                                       sal_Int32 & rEnd,
+                                       CharClass const & rCharClass)
+{
+    if (rBegin > rEnd || rEnd > rText.getLength())
+        return OUString();
+
+    sal_Int32 start = 7;
+    sal_Int32 count = rEnd-rBegin;
+    OUString candidate(rText.subView(rBegin, count));
+    // Match with regex "doi:10\.\d{4,9}\/[-._;()\/:a-zA-Z0-9]+"
+    if (candidate.startsWith("doi:10."))
+    {
+        bool flag = true;
+        sal_Int32 digit = 0;
+        for (sal_Int32 i=start; i<count; i++)
+        {
+            sal_Unicode c = candidate[i];
+            // Match 4 to 9 digits before slash
+            if (digit >= 0)
+            {
+                if (digit>9)
+                {
+                    flag = false;
+                    break;
+                }
+
+                if ( rCharClass.isDigit(candidate,i) )
+                {
+                    digit++;
+                }
+                else if (c=='/' && digit>=4 && i<count-1)
+                {
+                    digit=-1;
+                }
+                else
+                {
+                    flag = false;
+                    break;
+                }
+            }
+            // Match [-._;()\/:a-zA-Z0-9] after slash
+            else if (!( rCharClass.isAlphaNumeric(candidate, i) || c == '.' || 
c == '-' || c=='_' ||
+                        c==';' || c=='(' || c==')' || c=='\\' || (c=='/' && 
i<count-1) || c==':'))
+            {
+                flag = false;
+                break;
+            }
+        }
+        if (flag && digit==-1)
+        {
+            return candidate.replaceFirst("doi:","https://doi.org/";);
+        }
+    }
+    rBegin = rEnd;
+    return OUString();
+}
+
 OUString URIHelper::removePassword(OUString const & rURI,
                                    INetURLObject::EncodeMechanism 
eEncodeMechanism,
                                    INetURLObject::DecodeMechanism 
eDecodeMechanism,
diff --git a/sw/inc/comcore.hxx b/sw/inc/comcore.hxx
index 2fda83a3cbc4..3e43536742d2 100644
--- a/sw/inc/comcore.hxx
+++ b/sw/inc/comcore.hxx
@@ -43,8 +43,9 @@
 #define STR_AUTOFMTREDL_DEL_MORELINES           20
 #define STR_AUTOFMTREDL_NON_BREAK_SPACE         21
 #define STR_AUTOFMTREDL_TRANSLITERATE_RTL       22
+#define STR_AUTOFMTREDL_DETECT_DOI              23
 // !!!!!!!!!!!!!!!!!!!!!!!!!!  always set the correct end !!!!!!!!!!!!
-#define STR_AUTOFMTREDL_END                     23
+#define STR_AUTOFMTREDL_END                     24
 
 #endif
 
diff --git a/sw/inc/utlui.hrc b/sw/inc/utlui.hrc
index 2d71c113757d..d5998e143514 100644
--- a/sw/inc/utlui.hrc
+++ b/sw/inc/utlui.hrc
@@ -48,7 +48,8 @@ const TranslateId RID_SHELLRES_AUTOFMTSTRS[] =
     NC_("RID_SHELLRES_AUTOFMTSTRS", "Set \"Bullet\" or \"Numbering\" Style"),
     NC_("RID_SHELLRES_AUTOFMTSTRS", "Combine paragraphs"),
     NC_("RID_SHELLRES_AUTOFMTSTRS", "Add non breaking space"),
-    NC_("RID_SHELLRES_AUTOFMTSTRS", "Transliterates RTL Hungarian text to Old 
Hungarian script")
+    NC_("RID_SHELLRES_AUTOFMTSTRS", "Transliterates RTL Hungarian text to Old 
Hungarian script"),
+    NC_("RID_SHELLRES_AUTOFMTSTRS", "DOI citation recognition")
 };
 
 #endif
diff --git a/sw/source/core/edit/autofmt.cxx b/sw/source/core/edit/autofmt.cxx
index 0f5d4cd307e9..b63f19b24fac 100644
--- a/sw/source/core/edit/autofmt.cxx
+++ b/sw/source/core/edit/autofmt.cxx
@@ -2189,7 +2189,11 @@ void SwAutoFormat::AutoCorrect(TextFrameIndex nPos)
                 ( m_aFlags.bSetINetAttr &&
                     (nPos == TextFrameIndex(pText->getLength()) || 
IsSpace((*pText)[sal_Int32(nPos)])) &&
                     SetRedlineText( STR_AUTOFMTREDL_DETECT_URL ) &&
-                    pATst->FnSetINetAttr(aACorrDoc, *pText, 
sal_Int32(nLastBlank), sal_Int32(nPos), eLang)))
+                    pATst->FnSetINetAttr(aACorrDoc, *pText, 
sal_Int32(nLastBlank), sal_Int32(nPos), eLang)) ||
+                ( m_aFlags.bSetDOIAttr &&
+                    (nPos == TextFrameIndex(pText->getLength()) || 
IsSpace((*pText)[sal_Int32(nPos)])) &&
+                    SetRedlineText( STR_AUTOFMTREDL_DETECT_DOI ) &&
+                    pATst->FnSetDOIAttr(aACorrDoc, *pText, 
sal_Int32(nLastBlank), sal_Int32(nPos), eLang)))
             {
                 nPos = 
m_pCurTextFrame->MapModelToViewPos(*m_aDelPam.GetPoint());
             }
@@ -2779,7 +2783,8 @@ void SwEditShell::AutoFormatBySplitNode()
         SvxAutoCorrect* pACorr = SvxAutoCorrCfg::Get().GetAutoCorrect();
         if( pACorr && !pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | 
ACFlags::CapitalStartWord |
                                 ACFlags::AddNonBrkSpace | 
ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL |
-                                ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr 
| ACFlags::Autocorrect ))
+                                ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr 
| ACFlags::Autocorrect |
+                                ACFlags::SetDOIAttr ))
             pACorr = nullptr;
 
         if( pACorr )
diff --git a/sw/source/uibase/docvw/edtwin.cxx 
b/sw/source/uibase/docvw/edtwin.cxx
index a15dddf1dec6..5bcbd4a9c843 100644
--- a/sw/source/uibase/docvw/edtwin.cxx
+++ b/sw/source/uibase/docvw/edtwin.cxx
@@ -2570,7 +2570,8 @@ KEYINPUT_CHECKTABLE_INSDEL:
                     pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | 
ACFlags::CapitalStartWord |
                                             ACFlags::ChgOrdinalNumber | 
ACFlags::AddNonBrkSpace |
                                             ACFlags::ChgToEnEmDash | 
ACFlags::SetINetAttr |
-                                            ACFlags::Autocorrect | 
ACFlags::TransliterateRTL ) &&
+                                            ACFlags::Autocorrect | 
ACFlags::TransliterateRTL |
+                                            ACFlags::SetDOIAttr ) &&
                     '\"' != aCh && '\'' != aCh && '*' != aCh && '_' != aCh
                     )
                 {
@@ -2608,7 +2609,7 @@ KEYINPUT_CHECKTABLE_INSDEL:
                 pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | 
ACFlags::CapitalStartWord |
                                         ACFlags::ChgOrdinalNumber | 
ACFlags::TransliterateRTL |
                                         ACFlags::ChgToEnEmDash | 
ACFlags::SetINetAttr |
-                                        ACFlags::Autocorrect ) &&
+                                        ACFlags::Autocorrect | 
ACFlags::SetDOIAttr ) &&
                 !rSh.HasReadonlySel() )
             {
                 FlushInBuffer();
diff --git a/sw/source/uibase/shells/textsh.cxx 
b/sw/source/uibase/shells/textsh.cxx
index 8ecd18555c0e..e7af6d5f3275 100644
--- a/sw/source/uibase/shells/textsh.cxx
+++ b/sw/source/uibase/shells/textsh.cxx
@@ -167,7 +167,8 @@ void SwTextShell::ExecInsert(SfxRequest &rReq)
                 && pACorr->IsAutoCorrFlag(
                     ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord |
                     ACFlags::AddNonBrkSpace | ACFlags::ChgOrdinalNumber | 
ACFlags::TransliterateRTL |
-                    ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | 
ACFlags::Autocorrect ) )
+                    ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | 
ACFlags::Autocorrect |
+                    ACFlags::SetDOIAttr ) )
             {
                 rSh.AutoCorrect( *pACorr, cIns );
             }
diff --git a/sw/source/uibase/wrtsh/wrtsh1.cxx 
b/sw/source/uibase/wrtsh/wrtsh1.cxx
index 65f434d45961..0cdd279df379 100644
--- a/sw/source/uibase/wrtsh/wrtsh1.cxx
+++ b/sw/source/uibase/wrtsh/wrtsh1.cxx
@@ -173,7 +173,8 @@ static SvxAutoCorrect* lcl_IsAutoCorr()
     SvxAutoCorrect* pACorr = SvxAutoCorrCfg::Get().GetAutoCorrect();
     if( pACorr && !pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | 
ACFlags::CapitalStartWord |
                             ACFlags::AddNonBrkSpace | 
ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL |
-                            ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | 
ACFlags::Autocorrect ))
+                            ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | 
ACFlags::Autocorrect |
+                            ACFlags::SetDOIAttr ))
         pACorr = nullptr;
     return pACorr;
 }

Reply via email to