commit 08500c1a7c7450975ee3ee4fe7db25730c204ae6
Author: Juergen Spitzmueller <[email protected]>
Date: Sun Mar 19 11:44:22 2017 +0100
Improve BibTeX name parsing #2
Also consider grouping when looking for name separators.
Cases such as {{Barnes and Noble, Inc.}} are now handled correctly.
---
src/BiblioInfo.cpp | 60 ++++++++++++++++++++++++++++++++++++++++++++-------
1 files changed, 51 insertions(+), 9 deletions(-)
diff --git a/src/BiblioInfo.cpp b/src/BiblioInfo.cpp
index 67a3527..6dc976c 100644
--- a/src/BiblioInfo.cpp
+++ b/src/BiblioInfo.cpp
@@ -158,11 +158,57 @@ docstring constructName(docstring const & name, string
const scheme)
}
+vector<docstring> const getAuthors(docstring const & author)
+{
+ // We check for goupings (via {...}) and only consider " and "
+ // outside groups as author separator. This is to account
+ // for cases such as {{Barnes and Noble, Inc.}}, which
+ // need to be treated as one single family name.
+ // We use temporary placeholders in order to differentiate the
+ // diverse " and " cases.
+
+ // First, we temporarily replace all ampersands. It is rather unusual
+ // in author names, but can happen (consider cases such as "C \& A
Corp.").
+ docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
+ // Then, we temporarily make all " and " strings to ampersands in order
+ // to handle them later on a per-char level.
+ iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
+ // Now we traverse through the string and replace the "&" by the proper
+ // output in- and outside groups
+ docstring name;
+ int gl = 0;
+ docstring::const_iterator p = iname.begin();
+ while (p != iname.end()) {
+ // count grouping level
+ if (*p == '{')
+ ++gl;
+ else if (*p == '}')
+ --gl;
+ // generate string with probable placeholders
+ if (*p == '&') {
+ if (gl > 0)
+ // Inside groups, we output "and"
+ name += from_ascii("and");
+ else
+ // Outside groups, we output a separator
+ name += from_ascii("$$namesep!");
+ }
+ else
+ name += *p;
+ ++p;
+ }
+
+ // re-insert the literal ampersands
+ name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
+
+ // Now construct the actual vector
+ return getVectorFromString(name, from_ascii(" $$namesep! "));
+}
+
+
bool multipleAuthors(docstring const author)
{
- vector<docstring> const authors =
- getVectorFromString(author, from_ascii(" and "));
- return authors.size() > 1;
+ return getAuthors(author).size() > 1;
}
@@ -366,13 +412,9 @@ docstring const BibTeXInfo::getAuthorList(Buffer const *
buf,
if (author.empty())
return author;
- // FIXME Move this to a separate routine that can
- // be called from elsewhere.
- //
// OK, we've got some names. Let's format them.
- // Try to split the author list on " and "
- vector<docstring> const authors =
- getVectorFromString(author, from_ascii(" and "));
+ // Try to split the author list
+ vector<docstring> const authors = getAuthors(author);
docstring retval;