[LyX/master] Improve BibTeX name parsing #3

Juergen Spitzmueller Sun, 19 Mar 2017 04:43:38 -0700

commit 5fdcca4c0615926ae69df84ecf416dce2ad0f718
Author: Juergen Spitzmueller <[email protected]>
Date:   Sun Mar 19 12:42:18 2017 +0100


    Improve BibTeX name parsing #3
    
    Correctly handle name suffix ("Jr.-part")
---
 lib/citeengines/basic.citeengine |    4 +-
 lib/layouts/stdciteformats.inc   |    8 ++--
 src/BiblioInfo.cpp               |   65 ++++++++++++++++++++++++++++----------
 3 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/lib/citeengines/basic.citeengine b/lib/citeengines/basic.citeengine
index a37a2b7..cce91d7 100644
--- a/lib/citeengines/basic.citeengine
+++ b/lib/citeengines/basic.citeengine
@@ -60,9 +60,9 @@ CiteFormat default
        !sep ,
        !close ]
        # Modify scheme of the first author in the bibliography
-       !firstnameform %prename% %surname%
+       !firstnameform %prename% %surname%{%junior%[[, %junior%]]}
        # Modify scheme of other authors in the bibliography
-       !othernameform %prename% %surname%
+       !othernameform %prename% %surname%{%junior%[[, %junior%]]}
 
        # A link that lets us jump to the bibliography entry in LyXHTML
        # %clean:key% will be substituted by the cite key to give a unique id
diff --git a/lib/layouts/stdciteformats.inc b/lib/layouts/stdciteformats.inc
index 67530bf..da359fa 100644
--- a/lib/layouts/stdciteformats.inc
+++ b/lib/layouts/stdciteformats.inc
@@ -32,13 +32,13 @@ CiteFormat default
        # Macros
        #
        # Scheme of the first author in the bibliography
-       !firstnameform %surname%{%prename%[[, %prename%]]}
+       !firstnameform %surname%{%junior%[[, %junior%]]}{%prename%[[, 
%prename%]]}
        # Scheme of other authors in the bibliography
-       !othernameform %surname%{%prename%[[, %prename%]]}
+       !othernameform %surname%{%junior%[[, %junior%]]}{%prename%[[, 
%prename%]]}
        # Scheme of the first name in later parts (such as book editor)
-       !firstbynameform %prename% %surname%
+       !firstbynameform %prename% %surname%{%junior%[[, %junior%]]}
        # Scheme of other authors in later parts (such as book editor)
-       !otherbynameform %prename% %surname%
+       !otherbynameform %prename% %surname%{%junior%[[, %junior%]]}
        # pagination
        !pages {%pages%[[, %_pptext% %pages%]]}
        # ed. or eds.
diff --git a/src/BiblioInfo.cpp b/src/BiblioInfo.cpp
index 1d15f2f..f9c752f 100644
--- a/src/BiblioInfo.cpp
+++ b/src/BiblioInfo.cpp
@@ -55,11 +55,19 @@ docstring renormalize(docstring const & input)
 }
 
 
+struct name_parts {
+       docstring surname;
+       docstring prename;
+       docstring junior;
+};
+
+
 // gets the "prename" and "family name" from an author-type string
-pair<docstring, docstring> nameParts(docstring const & iname)
+name_parts nameParts(docstring const & iname)
 {
+       name_parts res;
        if (iname.empty())
-               return make_pair(docstring(), docstring());
+               return res;
 
        // First we check for goupings (via {...}) and replace blanks and
        // commas inside groups with temporary placeholders
@@ -85,21 +93,33 @@ pair<docstring, docstring> nameParts(docstring const & 
iname)
        // Now we look for a comma, and take the last name to be everything
        // preceding the right-most one, so that we also get the "jr" part.
        vector<docstring> pieces = getVectorFromString(name);
-       if (pieces.size() > 1)
+       if (pieces.size() > 1) {
                // whether we have a jr. part or not, it's always
                // the first and last item (reversed)
-               return make_pair(renormalize(pieces.back()), 
renormalize(pieces.front()));
+               res.surname = renormalize(pieces.front());
+               res.prename = renormalize(pieces.back());
+               // If we have three pieces (the maximum allowed by BibTeX),
+               // the second one is the jr part.
+               if (pieces.size() > 2)
+                       res.junior = renormalize(pieces.at(1));
+               return res;
+       }
 
        // OK, so now we want to look for the last name. We're going to
        // include the "von" part. This isn't perfect.
        // Split on spaces, to get various tokens.
        pieces = getVectorFromString(name, from_ascii(" "));
        // No space: Only a family name given
-       if (pieces.size() < 2)
-               return make_pair(from_ascii(""), renormalize(pieces.back()));
+       if (pieces.size() < 2) {
+               res.surname = renormalize(pieces.back());
+               return res;
+       }
        // If we get two pieces, assume the last one is the last name
-       if (pieces.size() == 2)
-               return make_pair(renormalize(pieces.front()), 
renormalize(pieces.back()));
+       if (pieces.size() == 2) {
+               res.surname = renormalize(pieces.back());
+               res.prename = renormalize(pieces.front());
+               return res;
+       }
 
        // More than 3 pieces: Now we look for the first piece that
        // begins with a lower case letter (the "von-part").
@@ -141,7 +161,9 @@ pair<docstring, docstring> nameParts(docstring const & 
iname)
                        first = false;
                surname += *it;
        }
-       return make_pair(renormalize(prename), renormalize(surname));
+       res.surname = renormalize(surname);
+       res.prename = renormalize(prename);
+       return res;
 }
 
 
@@ -149,10 +171,12 @@ docstring constructName(docstring const & name, string 
const scheme)
 {
        // re-constructs a name from name parts according
        // to a given scheme
-       docstring const prename = nameParts(name).first;
-       docstring const surname = nameParts(name).second;
+       docstring const prename = nameParts(name).prename;
+       docstring const surname = nameParts(name).surname;
+       docstring const junior = nameParts(name).junior;
        string res = scheme;
        static regex const 
reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
+       static regex const 
reg2("(.*)(\\{%junior%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
        smatch sub;
        if (regex_match(scheme, sub, reg1)) {
                res = sub.str(1);
@@ -160,9 +184,16 @@ docstring constructName(docstring const & name, string 
const scheme)
                        res += sub.str(3);
                res += sub.str(5);
        }
+       if (regex_match(res, sub, reg2)) {
+               res = sub.str(1);
+               if (!junior.empty())
+                       res += sub.str(3);
+               res += sub.str(5);
+       }
        docstring result = from_ascii(res);
        result = subst(result, from_ascii("%prename%"), prename);
        result = subst(result, from_ascii("%surname%"), surname);
+       result = subst(result, from_ascii("%junior%"), junior);
        return result;
 }
 
@@ -445,15 +476,15 @@ docstring const BibTeXInfo::getAuthorList(Buffer const * 
buf,
                     : " and ";
        string firstnameform =
                        buf ? 
buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
-                            : "%surname%, %prename%";
+                            : "%surname%{%junior%[[, %junior%]]}{%prename%[[, 
%prename%]]}";
        if (!beginning)
                firstnameform = buf ? 
buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
-                                            : "%prename% %surname%";
+                                            : "%prename% %surname%{%junior%[[, 
%junior%]]}";
        string othernameform = buf ? 
buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
-                            : "%surname%, %prename%";
+                            : "%surname%{%junior%[[, %junior%]]}{%prename%[[, 
%prename%]]}";
        if (!beginning)
                othernameform = buf ? 
buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
-                                            : "%prename% %surname%";
+                                            : "%prename% %surname%{%junior%[[, 
%junior%]]}";
 
        // Shorten the list (with et al.) if forceshort is set
        // and the list can actually be shortened, else if maxcitenames
@@ -481,13 +512,13 @@ docstring const BibTeXInfo::getAuthorList(Buffer const * 
buf,
                        retval += (i == 0) ? constructName(*it, firstnameform)
                                : constructName(*it, othernameform);
                else
-                       retval += nameParts(*it).second;
+                       retval += nameParts(*it).surname;
        }
        if (shorten) {
                if (allnames)
                        retval = constructName(authors[0], firstnameform) + 
(buf ? buf->B_(etal) : from_ascii(etal));
                else
-                       retval = nameParts(authors[0]).second + (buf ? 
buf->B_(etal) : from_ascii(etal));
+                       retval = nameParts(authors[0]).surname + (buf ? 
buf->B_(etal) : from_ascii(etal));
        }
 
        return convertLaTeXCommands(retval);

[LyX/master] Improve BibTeX name parsing #3

Reply via email to