Revision: 22509
          http://sourceforge.net/p/bibdesk/svn/22509
Author:   hofman
Date:     2018-08-22 14:19:07 +0000 (Wed, 22 Aug 2018)
Log Message:
-----------
Replace use of NSXML library with WebKit DOM library in web parsers. NSXML is 
buggy, as it seems to choke on certain stuff (comments?) and drops empty 
elements. Also we don't need to create a separate XML document. Need to change 
some XPath logic because DOM only supports XPath version 1, rather than 2.

Modified Paths:
--------------
    trunk/bibdesk/BDSKACMDLParser.m
    trunk/bibdesk/BDSKArxivParser.m
    trunk/bibdesk/BDSKAsynchronousWebParser.h
    trunk/bibdesk/BDSKAsynchronousWebParser.m
    trunk/bibdesk/BDSKBibTeXWebParser.m
    trunk/bibdesk/BDSKCOinSParser.m
    trunk/bibdesk/BDSKCiteULikeParser.m
    trunk/bibdesk/BDSKDOIWebParser.m
    trunk/bibdesk/BDSKGoogleScholarParser.m
    trunk/bibdesk/BDSKHCiteParser.m
    trunk/bibdesk/BDSKHubmedParser.m
    trunk/bibdesk/BDSKIACRParser.m
    trunk/bibdesk/BDSKIEEEXploreParser.m
    trunk/bibdesk/BDSKIUCrParser.m
    trunk/bibdesk/BDSKInspireParser.m
    trunk/bibdesk/BDSKJSTORWebParser.m
    trunk/bibdesk/BDSKMASParser.m
    trunk/bibdesk/BDSKMathSciNetParser.m
    trunk/bibdesk/BDSKNumdamParser.m
    trunk/bibdesk/BDSKProjectEuclidParser.m
    trunk/bibdesk/BDSKSIAMParser.m
    trunk/bibdesk/BDSKScienceDirectParser.m
    trunk/bibdesk/BDSKSpringerParser.m
    trunk/bibdesk/BDSKWebParser.h
    trunk/bibdesk/BDSKWebParser.m
    trunk/bibdesk/BDSKZentralblattParser.m
    trunk/bibdesk/DOMNode_BDSKExtensions.h
    trunk/bibdesk/DOMNode_BDSKExtensions.m
    trunk/bibdesk/NSString_BDSKExtensions.h
    trunk/bibdesk/NSString_BDSKExtensions.m

Modified: trunk/bibdesk/BDSKACMDLParser.m
===================================================================
--- trunk/bibdesk/BDSKACMDLParser.m     2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKACMDLParser.m     2018-08-22 14:19:07 UTC (rev 22509)
@@ -37,7 +37,6 @@
  */
 
 #import "BDSKACMDLParser.h"
-#import "NSXMLNode_BDSKExtensions.h"
 #import <AGRegex/AGRegex.h>
 
 
@@ -45,8 +44,8 @@
 
 + (NSString *)citationNodeXPath { return 
@"./head/meta[@name='citation_abstract_html_url']/@content"; }
 
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
-    NSString *nodeString = [[NSURL URLWithString:[node 
stringValueOfAttribute:@"content"]] query];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+    NSString *nodeString = [[NSURL URLWithString:[(DOMElement *)node 
getAttribute:@"content"]] query];
 
     AGRegex *doiRegex = [AGRegex regexWithPattern:@"^id=([0-9]+)\\.([0-9]+)$"];
     AGRegexMatch *match = [doiRegex findInString:nodeString];

Modified: trunk/bibdesk/BDSKArxivParser.m
===================================================================
--- trunk/bibdesk/BDSKArxivParser.m     2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKArxivParser.m     2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,7 +39,7 @@
 #import "BDSKArxivParser.h"
 #import "BibItem.h"
 #import "BDSKLinkedFile.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
 #import "NSURL_BDSKExtensions.h"
 #import <AGRegex/AGRegex.h>
 
@@ -46,7 +46,7 @@
 
 @implementation BDSKArxivParser
 
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
     
     // !!! other countries end up with e.g. fr.arxiv.org; checking for 
scholar.arxiv.com may fail in those cases
     if ([url hasDomain:@"arxiv.org"] == NO)
@@ -63,8 +63,7 @@
     else
         return NO;
     
-    NSError *error = nil;    
-    NSInteger nodecount = [[[xmlDocument rootElement] 
nodesForXPath:containsArxivLinkNode error:&error] count];
+    NSInteger nodecount = [[[domDocument documentElement] 
nodesForXPath:containsArxivLinkNode] count];
     
     return nodecount > 0;
 }
@@ -96,10 +95,10 @@
         pdfURLNodePath = 
@"../div[@class='extra-services']/div[@class='full-text']/ul/li/a[contains(text(),'PDF')]";
         doiNodePath = @"./div[@class='metatable']/table//td[@class='tablecell 
doi']/a";
         
-        titleNodePath = @"./h1[contains(concat(' ',normalize-space(@class),' 
'),' title ')]/text()";
+        titleNodePath = @"./h1[contains(concat(' ',normalize-space(@class),' 
'),' title ')]/span/following-sibling::text()";
         authorsNodePath = @"./div[@class='authors']/a";
         journalNodePath = 
@"./div[@class='metatable']/table//td[@class='tablecell jref']";
-        abstractNodePath = @"./blockquote[contains(concat(' 
',normalize-space(@class),' '),' abstract ')]/text()";
+        abstractNodePath = @"./blockquote[contains(concat(' 
',normalize-space(@class),' '),' abstract ')]";
     } else if (isSearch) {
         arxivSearchResultNodePath = 
@"./body//ol[@class='breathe-horizontal']/li[@class='arxiv-result']";
         
@@ -112,7 +111,7 @@
         titleNodePath = @"./p[contains(concat(' ',normalize-space(@class),' 
'),' title ')]";
         authorsNodePath = @"./p[@class='authors']/a";
         journalNodePath = @"./p[contains(concat(' ',normalize-space(@class),' 
'),' comments ')]/span[text()='Journal ref:']/following-sibling::text()";
-        abstractNodePath = @"./p[contains(concat(' ',normalize-space(@class),' 
'),' abstract ')]/span[contains(concat(' ',normalize-space(@class),' '),' 
abstract-full ')]/text()";
+        abstractNodePath = @"./p[contains(concat(' ',normalize-space(@class),' 
'),' abstract ')]/span[contains(concat(' ',normalize-space(@class),' '),' 
abstract-full ')]";
     } else if (isList) {
         arxivSearchResultNodePath = @"./body/div[@id='content']/div/dl/dt";
         
@@ -120,11 +119,11 @@
         arxivIDNodePath = @"./a[contains(text(),'arXiv:')]";
         
         pdfURLNodePath = @"./a[contains(text(),'pdf')]";
-        doiNodePath = @"./div[@class='list-doi']/text()";
+        doiNodePath = 
@"./div[@class='list-doi']/span/following-sibling::text()";
         
-        titleNodePath = @"./div[contains(concat(' ',normalize-space(@class),' 
'),' list-title ')]/text()";
+        titleNodePath = @"./div[contains(concat(' ',normalize-space(@class),' 
'),' list-title ')]/span/following-sibling::text()";
         authorsNodePath = @"./div[@class='list-authors']/a";
-        journalNodePath = @"./div[@class='list-journal-ref']/text()";
+        journalNodePath = 
@"./div[@class='list-journal-ref']/span/following-sibling::text()";
         abstractNodePath = @"./p";
     } else {
         return  nil;
@@ -144,20 +143,18 @@
     
     // fetch the arxiv search results
     NSArray *arxivSearchResults = nil;
-    NSXMLElement *rootElement = [[self xmlDocument] rootElement];
+    DOMElement *rootElement = [[self domDocument] documentElement];
     
-    arxivSearchResults = [rootElement nodesForXPath:arxivSearchResultNodePath 
error:outError];
+    arxivSearchResults = [rootElement nodesForXPath:arxivSearchResultNodePath];
     
     BOOL addLinkedFiles = NO == [[NSUserDefaults standardUserDefaults] 
boolForKey:BDSKUseLocalUrlAndUrlKey];
     
     NSMutableArray *items = [NSMutableArray arrayWithCapacity:0];
     
-    for (NSXMLNode *arxivSearchResult in arxivSearchResults) {
+    for (DOMNode *arxivSearchResult in arxivSearchResults) {
         
         // fetch the arxiv links
-        NSError *error = nil;
-        NSArray *nodes = [arxivSearchResult nodesForXPath:arxivLinkNodePath
-                                                             error:&error];
+        NSArray *nodes = [arxivSearchResult nodesForXPath:arxivLinkNodePath];
         
         if (1 != [nodes count]) {
             // If arXiv ever start providing multiple alternative bibtex links 
for a
@@ -166,8 +163,8 @@
             continue;
         }
         
-        NSXMLNode *arxivNode = [nodes objectAtIndex:0];
-        NSXMLNode *node;
+        DOMNode *arxivNode = [nodes objectAtIndex:0];
+        DOMNode *node;
         
         NSMutableDictionary *pubFields = [NSMutableDictionary dictionary];
         NSMutableArray *pubFiles = [NSMutableArray array];
@@ -174,16 +171,15 @@
         NSString *string = nil;
         
         // search for arXiv ID
-        nodes = [arxivNode nodesForXPath:arxivIDNodePath error:&error];
+        nodes = [arxivNode nodesForXPath:arxivIDNodePath];
         if (1 == [nodes count]) {
             node = [nodes firstObject];
             if ((string = [node stringValue])) {
-                string = [string 
stringByRemovingSurroundingWhitespaceAndNewlines];
                 if ([string hasCaseInsensitivePrefix:@"arXiv:"])
                     string = [string substringFromIndex:6];
                 [pubFields setValue:string forKey:@"Eprint"];
             }
-            if ((string = [node stringValueOfAttribute:@"href"])) {
+            if ((string = [(DOMElement *)node getAttribute:@"href"])) {
                 // fix relative urls
                 if (NO == [string containsString:@"://"])
                     string = [[NSURL URLWithString:string relativeToURL:url] 
absoluteString];
@@ -197,10 +193,10 @@
             arxivNode = arxivSearchResult;
         
         // search for PDF
-        nodes = [arxivNode nodesForXPath:pdfURLNodePath error:&error];
+        nodes = [arxivNode nodesForXPath:pdfURLNodePath];
         if (1 == [nodes count]) {
             // successfully found the result PDF url
-            if ((string = [[nodes firstObject] 
stringValueOfAttribute:@"href"])) {
+            if ((string = [[nodes firstObject] getAttribute:@"href"])) {
                 // fix relative urls
                 if (NO == [string containsString:@"://"])
                     string = [[NSURL URLWithString:string relativeToURL:url] 
absoluteString];
@@ -211,7 +207,7 @@
         }
         
         // search for DOI
-        nodes = [arxivNode nodesForXPath:doiNodePath error:&error];
+        nodes = [arxivNode nodesForXPath:doiNodePath];
         if (1 == [nodes count]) {
             // successfully found the result PDF url
             if ((string = [[nodes firstObject] stringValue])) {
@@ -219,33 +215,32 @@
             }
         }
         
-        if (isList)
-            arxivNode = [[[arxivSearchResult nextSibling] children] 
firstObject];
-        else
+        if (isList) {
+            arxivNode = [[arxivSearchResult 
nodesForXPath:@"./following-sibling::dd/div[@class='meta']"] firstObject];
+        } else {
             arxivNode = arxivSearchResult;
+        }
         
         // search for title
-        nodes = [arxivNode nodesForXPath:titleNodePath error:&error];
+        nodes = [arxivNode nodesForXPath:titleNodePath];
         if (1 == [nodes count]) {
             if ((string = [[nodes firstObject] stringValue])) {
-                string = [string 
stringByRemovingSurroundingWhitespaceAndNewlines];
                 [pubFields setValue:string forKey:BDSKTitleString];
             }
         }
         
         // search for authors
-        nodes = [arxivNode nodesForXPath:authorsNodePath error:&error];
+        nodes = [arxivNode nodesForXPath:authorsNodePath];
         if (0 < [nodes count]) {
-            if ((string = [[nodes 
valueForKeyPath:@"stringValue.stringByRemovingSurroundingWhitespaceAndNewlines"]
 componentsJoinedByString:@" and "])) {
+            if ((string = [[nodes valueForKeyPath:@"stringValue"] 
componentsJoinedByString:@" and "])) {
                 [pubFields setValue:string forKey:BDSKAuthorString];
             }
         }
         
         // search for journal ref
-        nodes = [arxivNode nodesForXPath:journalNodePath error:&error];
+        nodes = [arxivNode nodesForXPath:journalNodePath];
         if (1 == [nodes count]) {
             if ((string = [[nodes firstObject] stringValue])) {
-                string = [string 
stringByRemovingSurroundingWhitespaceAndNewlines];
                 // try to get full journal ref components, as "Journal Volume 
(Year) Pages"
                 AGRegexMatch *match = [journalRegex1 findInString:string];
                 if ([match groupAtIndex:0]) {
@@ -273,10 +268,13 @@
         }
         
         // search for abstract
-        nodes = [arxivNode nodesForXPath:abstractNodePath error:&error];
+        nodes = [arxivNode nodesForXPath:abstractNodePath];
         if (1 == [nodes count]) {
-            if ((string = [[nodes firstObject] stringValue])) {
-                string = [string 
stringByRemovingSurroundingWhitespaceAndNewlines];
+            if ((string = [[nodes firstObject] stringValuePreservingBreaks])) {
+                if (isAbstract && [string hasPrefix:@"Abstract: "])
+                    string = [string substringFromIndex:10];
+                if (isSearch && [string hasSuffix:@"\u25B3 Less"])
+                    string = [[string substringToIndex:[string length] - 6] 
stringByRemovingSurroundingWhitespaceAndNewlines];
                 [pubFields setValue:string forKey:BDSKAbstractString];
             }
         }

Modified: trunk/bibdesk/BDSKAsynchronousWebParser.h
===================================================================
--- trunk/bibdesk/BDSKAsynchronousWebParser.h   2018-08-22 06:30:25 UTC (rev 
22508)
+++ trunk/bibdesk/BDSKAsynchronousWebParser.h   2018-08-22 14:19:07 UTC (rev 
22509)
@@ -59,7 +59,7 @@
 // Cet the URL string pointing to the bibTeX data from the node found by 
citationNodeXPath
 // Can be a relative URL, which is completed using the URL of the web page
 // By default returns the href attribute of the node
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node;
++ (NSString *)citationURLStringFromNode:(DOMNode *)node;
 
 // Subclasses can also override -itemsReturningError: adding downloads 
themselves using the following
 - (void)addDownloadWithRequest:(NSURLRequest *)request 
contextInfo:(id)contextInfo;

Modified: trunk/bibdesk/BDSKAsynchronousWebParser.m
===================================================================
--- trunk/bibdesk/BDSKAsynchronousWebParser.m   2018-08-22 06:30:25 UTC (rev 
22508)
+++ trunk/bibdesk/BDSKAsynchronousWebParser.m   2018-08-22 14:19:07 UTC (rev 
22509)
@@ -40,7 +40,7 @@
 #import "BibItem.h"
 #import "BDSKBibTeXParser.h"
 #import "NSError_BDSKExtensions.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
 #import "NSURL_BDSKExtensions.h"
 
 
@@ -62,14 +62,13 @@
     return [self finishedStarting] && [downloads count] == 0;
 }
 
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url {
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url {
     NSString *host = [[[NSURL URLWithString:[self address]] host] 
lowercaseString];
     
     if ([url hasDomain:host] == NO)
         return NO;
     
-    NSError *error = nil;
-    NSArray *nodes = [[xmlDocument rootElement] nodesForXPath:[self 
citationNodeXPath] error:&error];
+    NSArray *nodes = [[domDocument documentElement] nodesForXPath:[self 
citationNodeXPath]];
     
     if ([nodes count] == 0)
         return NO;
@@ -80,10 +79,10 @@
 - (NSArray *)itemsReturningError:(NSError **)outError {
     
     NSString *bibtexNodePath = [[self class] citationNodeXPath];
-    NSArray *bibtexNodes = [[[self xmlDocument] rootElement] 
nodesForXPath:bibtexNodePath error:outError];
+    NSArray *bibtexNodes = [[[self domDocument] documentElement] 
nodesForXPath:bibtexNodePath];
     NSMutableArray *bibtexURLStrings = [NSMutableArray array];
     
-    for (NSXMLNode *bibtexNode in bibtexNodes) {
+    for (DOMNode *bibtexNode in bibtexNodes) {
         NSString *bibtexURLString = [[self class] 
citationURLStringFromNode:bibtexNode];
         if (bibtexURLString != nil && [bibtexURLStrings 
containsObject:bibtexURLString] == NO)
             [bibtexURLStrings addObject:bibtexURLString];
@@ -106,8 +105,8 @@
 
 + (NSString *)citationNodeXPath { return @""; }
 
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
-    return [node stringValueOfAttribute:@"href"];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+    return [(DOMElement *)node getAttribute:@"href"];
 }
 
 - (void)downloadDidFinish:(BDSKCitationDownload *)download {

Modified: trunk/bibdesk/BDSKBibTeXWebParser.m
===================================================================
--- trunk/bibdesk/BDSKBibTeXWebParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKBibTeXWebParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,7 +39,7 @@
 #import "BDSKBibTeXWebParser.h"
 #import "BDSKBibTeXParser.h"
 #import "BibItem.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
 #import "NSError_BDSKExtensions.h"
 #import <AGRegex/AGRegex.h>
 
@@ -46,9 +46,9 @@
 
 @implementation BDSKBibTeXWebParser
 
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
     
-    NSString *text = [xmlDocument textStringValue];
+    NSString *text = [[domDocument body] textContent];
        AGRegex *bibtexRegex = [AGRegex regexWithPattern:@"@[[:alpha:]]+[ 
\\t]*[{(]"];
        
     return nil != [bibtexRegex findInString:text];
@@ -59,7 +59,7 @@
     
     NSMutableArray *items = [NSMutableArray array];
     
-    NSString *text = [[self xmlDocument] textStringValue];
+    NSString *text = [[[self domDocument] body] textContent];
 
     AGRegex *bibtexRegex = [AGRegex regexWithPattern:@"@[[:alpha:]]+[ 
\\t]*[{(]"];
     

Modified: trunk/bibdesk/BDSKCOinSParser.m
===================================================================
--- trunk/bibdesk/BDSKCOinSParser.m     2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKCOinSParser.m     2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,7 +39,7 @@
 
 #import "BDSKCOinSParser.h"
 #import <AGRegex/AGRegex.h>
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
 #import "BDSKLinkedFile.h"
 #import "NSString_BDSKExtensions.h"
 
@@ -64,12 +64,12 @@
 
 @implementation BDSKCOinSParser
 
+static NSString *hasCOinSNodesXPath = @"./body//span[@class='Z3988' and 
string-length(@title)!=0]";
 
 // Claim that the can parse the document if its markup contains the string 
Z3988.
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
        
-    NSError *error;
-    NSArray *nodes = [[xmlDocument rootElement] 
nodesForXPath:@"./body//span[@class='Z3988']" error:&error];
+    NSArray *nodes = [[domDocument documentElement] 
nodesForXPath:hasCOinSNodesXPath];
     
     return [nodes count] > 0;
 }
@@ -106,8 +106,8 @@
        }
        
        
-       NSArray * components = [inputString 
componentsSeparatedByString:@"&amp;"];
-       if ([components count] < 2 ) { return nil; }
+    NSArray * components = [inputString componentsSeparatedByString:@"&"];
+    if ([components count] < 2 ) { return nil; }
 
     NSMutableDictionary *fieldsDict = [NSMutableDictionary dictionary];
     NSMutableArray *files = [NSMutableArray array];
@@ -321,15 +321,14 @@
 // Process the document. 
 - (NSArray *)itemsReturningError:(NSError **)outError {
 
-    NSArray *nodes = [[[self xmlDocument] rootElement] 
nodesForXPath:@"./body//span[@class='Z3988']" error:outError];
+    NSArray *nodes = [[[self domDocument] documentElement] 
nodesForXPath:hasCOinSNodesXPath];
     
     NSMutableArray *items = [NSMutableArray arrayWithCapacity:[nodes count]];
     
-    for (NSXMLNode *node in nodes) {
+    for (DOMNode *node in nodes) {
         NSString *title;
         BibItem *bibItem;
-        if ([node kind] == NSXMLElementKind &&
-            (title = [[(NSXMLElement *)node attributeForName:@"title"] 
XMLString]) &&
+        if ((title = [(DOMElement *)node getAttribute:@"title"]) &&
             (bibItem = [BDSKCOinSParser parseCOinSString:title]))
             [items addObject:bibItem];
     }

Modified: trunk/bibdesk/BDSKCiteULikeParser.m
===================================================================
--- trunk/bibdesk/BDSKCiteULikeParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKCiteULikeParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -37,14 +37,13 @@
 
 #import "BDSKCiteULikeParser.h"
 #import "BibItem.h"
-#import "NSXMLNode_BDSKExtensions.h"
 
 @implementation BDSKCiteULikeParser
 
 + (NSString *)citationNodeXPath { return 
@"./body//div[@id='export']/table//td/a[text()='BibTeX']"; }
 
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
-    NSMutableString *urlString = [[[node stringValueOfAttribute:@"href"] 
mutableCopy] autorelease];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+    NSMutableString *urlString = [[[(DOMElement *)node getAttribute:@"href"] 
mutableCopy] autorelease];
     
     [urlString replaceOccurrencesOfString:@"/bibtex_options/" 
withString:@"/bibtex/" options:NSCaseInsensitiveSearch range:NSMakeRange(0, 
[urlString length])];
     

Modified: trunk/bibdesk/BDSKDOIWebParser.m
===================================================================
--- trunk/bibdesk/BDSKDOIWebParser.m    2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKDOIWebParser.m    2018-08-22 14:19:07 UTC (rev 22509)
@@ -37,7 +37,7 @@
  */
 
 #import "BDSKDOIWebParser.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
 #import "NSURL_BDSKExtensions.h"
 #import "NSString_BDSKExtensions.h"
 #import <AGRegex/AGRegex.h>
@@ -45,25 +45,29 @@
 
 @implementation BDSKDOIWebParser
 
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url {
-    NSXMLNode *rootElement = [xmlDocument rootElement];
-    NSError *error;
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url {
+    DOMNode *rootElement = [domDocument documentElement];
     NSString *doiXPath;
     NSArray *nodes;
     
-    doiXPath = @"./head/meta[contains(' citation_doi doi prism.doi 
dc.identifier ', concat(' ',lower-case(@name),' ') and 
(starts-with(lower-case(@content),'doi:') or starts-with(@content,'10.'))][1]";
-    nodes = [rootElement nodesForXPath:doiXPath error:&error];
+    doiXPath = 
@"./head/meta[starts-with(translate(@content,'DOI','doi'),'doi:') or 
starts-with(@content,'10.')]";
+    nodes = [rootElement nodesForXPath:doiXPath];
     
-    if ([nodes count] > 0)
-        return YES;
+    if ([nodes count] > 0) {
+        NSSet *names = [NSSet setWithObjects:@"citation_doi", @"doi", 
@"prism.doi", @"dc.identifier", nil];
+        for (DOMNode *node in nodes) {
+            if ([names containsObject:[[node nodeName] lowercaseString]])
+                return YES;
+        }
+    }
     
     doiXPath = @"./body//a[starts-with(@href,'https://doi.org/') or 
starts-with(@href,'http://dx.doi.org/')]";
-    nodes = [rootElement nodesForXPath:doiXPath error:&error];
+    nodes = [rootElement nodesForXPath:doiXPath];
     
     if ([nodes count] > 0)
         return YES;
     
-    NSString *text = [xmlDocument textStringValue];
+    NSString *text = [[domDocument body] textContent];
     AGRegex *doiRegex = [AGRegex regexWithPattern:@"(doi:[ 
\\t]*|https?://(dx\\.)?doi\\.org/)10\\.[0-9]{4,}(\\.[0-9]+)*/\\S+" 
options:AGRegexCaseInsensitive];
     
     return nil != [doiRegex findInString:text];
@@ -72,32 +76,39 @@
 - (NSArray *)itemsReturningError:(NSError **)outError {
     NSMutableArray *dois = [NSMutableArray array];
     NSURL *baseURL = [NSURL URLWithString:@"https://doi.org/";];
-    NSXMLNode *rootElement = [[self xmlDocument] rootElement];
+    DOMNode *rootElement = [[self domDocument] documentElement];
     NSString *doiXPath;
     NSArray *nodes = nil;
-    NSError *error = nil;
     
     AGRegex *doiRegex = [AGRegex 
regexWithPattern:@"^(doi:|https?://(dx\\.)?doi\\.org/)?(10\\.[0-9]{4,}(\\.[0-9]+)*/\\S+)$"
 options:AGRegexCaseInsensitive];
     AGRegexMatch *match;
     NSString *doi;
     
-    doiXPath = @"./head/meta[contains(' citation_doi doi prism.doi 
dc.identifier ', concat(' ', lower-case(@name), ' ') and 
(starts-with(lower-case(@content),'doi:') or starts-with(@content,'10.'))][1]";
-    nodes = [rootElement nodesForXPath:doiXPath error:&error];
+    doiXPath = 
@"./head/meta[starts-with(translate(@content,'DOI','doi'),'doi:') or 
starts-with(@content,'10.')]";
+    nodes = [rootElement nodesForXPath:doiXPath];
     
     if ([nodes count] > 0) {
-        doi = [[nodes firstObject] stringValueOfAttribute:@"content"];
-        if (doi && (match = [doiRegex findInString:doi])) {
-            doi = [match groupAtIndex:3];
-            if ([[match groupAtIndex:1] hasPrefix:@"http"] == NO)
-                doi = [doi stringByAddingPercentEscapes];
-            [dois addObject:doi];
+        NSSet *names = [NSSet setWithObjects:@"citation_doi", @"doi", 
@"prism.doi", @"dc.identifier", nil];
+        for (DOMElement *node in nodes) {
+            if ([names containsObject:[[node nodeName] lowercaseString]]) {
+                doi = [node getAttribute:@"content"];
+                if (doi && (match = [doiRegex findInString:doi])) {
+                    doi = [match groupAtIndex:3];
+                    if ([[match groupAtIndex:1] hasPrefix:@"http"] == NO)
+                        doi = [doi stringByAddingPercentEscapes];
+                    [dois addObject:doi];
+                }
+                break;
+            }
         }
-    } else {
+    }
+    
+    if ([dois count] == 0) {
         doiXPath = @"./body//a[starts-with(@href,'https://doi.org/') or 
starts-with(@href,'http://dx.doi.org/')]";
-        nodes = [rootElement nodesForXPath:doiXPath error:&error];
+        nodes = [rootElement nodesForXPath:doiXPath];
         
-        for (NSXMLNode *node in nodes) {
-            doi = [node stringValueOfAttribute:@"href"];
+        for (DOMElement *node in nodes) {
+            doi = [node getAttribute:@"href"];
             if (doi && (match = [doiRegex findInString:doi])) {
                 doi = [[match groupAtIndex:3] 
stringByTrimmingCharactersInSet:[NSCharacterSet punctuationCharacterSet]];
                 if ([dois containsObject:doi] == NO)
@@ -107,7 +118,7 @@
     }
     
     if ([dois count] == 0) {
-        NSString *text = [[self xmlDocument] textStringValue];
+        NSString *text = [[[self domDocument] body] textContent];
         
         doiRegex = [AGRegex regexWithPattern:@"(doi:[ 
\\t]*|https?://(dx\\.)?doi\\.org/)(10\\.[0-9]{4,}(\\.[0-9]+)*/\\S+)" 
options:AGRegexCaseInsensitive];
         
@@ -128,9 +139,6 @@
         [self addDownloadWithRequest:request contextInfo:nil];
     }
     
-    if ([dois count] == 0 && outError)
-        *outError = error;
-    
     return nil;
 }
 

Modified: trunk/bibdesk/BDSKGoogleScholarParser.m
===================================================================
--- trunk/bibdesk/BDSKGoogleScholarParser.m     2018-08-22 06:30:25 UTC (rev 
22508)
+++ trunk/bibdesk/BDSKGoogleScholarParser.m     2018-08-22 14:19:07 UTC (rev 
22509)
@@ -36,6 +36,7 @@
  */
 
 #import "BDSKGoogleScholarParser.h"
+#import "DOMNode_BDSKExtensions.h"
 
 #define BDSKDisableGoogleScholarListParsingKey 
@"BDSKDisableGoogleScholarListParsing"
 
@@ -43,7 +44,7 @@
 
 + (NSString *)citationNodeXPath { return 
@"./body//a[contains(text(),'BibTeX')]"; }
 
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
     // !!! other countries end up with e.g. scholar.google.be; checking for 
scholar.google.com may fail in those cases
     // also some sites access google scholar via an ezproxy, so the suffix 
could be quite complex
     if (nil == [url host] || NO == [[[url host] lowercaseString] 
hasPrefix:@"scholar.google."] || 
@@ -51,8 +52,7 @@
         return NO;
     }
     
-    NSError *error = nil;
-    NSUInteger nodecount = [[[xmlDocument rootElement] nodesForXPath:[self 
citationNodeXPath] error:&error] count];
+    NSUInteger nodecount = [[[domDocument documentElement] nodesForXPath:[self 
citationNodeXPath]] count];
 
     return nodecount > 0;
 }

Modified: trunk/bibdesk/BDSKHCiteParser.m
===================================================================
--- trunk/bibdesk/BDSKHCiteParser.m     2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKHCiteParser.m     2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,23 +39,24 @@
 #import <WebKit/WebKit.h>
 #import "BibItem.h"
 #import "BDSKTypeManager.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
 
 
 @interface BDSKHCiteParser (Private)
-- (NSDate *)dateFromNode:(NSXMLNode *)node;
-- (NSString *)BTAuthorStringFromVCardNode:(NSXMLNode *)node;
-- (NSMutableDictionary *)dictionaryFromCitationNode:(NSXMLNode *)citationNode;
+- (NSDate *)dateFromNode:(DOMNode *)node;
+- (NSString *)BTAuthorStringFromVCardNode:(DOMNode *)node;
+- (NSMutableDictionary *)dictionaryFromCitationNode:(DOMNode *)citationNode 
isContainer:(BOOL)isContainer;
 
 @end
 
-
 @implementation BDSKHCiteParser
 
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
     
-    NSError *error = nil;
-    return [[[xmlDocument rootElement] 
descendantOrSelfNodesWithClassName:@"hcite" error:&error] count] > 0;
+    NSString *hciteXpath = @".//*[contains(concat(' ',normalize-space(@class), 
' '),' hcite ') and not(contains(concat(' ', normalize-space(@class), ' '),' 
container '))]";
+    NSArray *mainNodes = [[domDocument documentElement] 
nodesForXPath:hciteXpath];
+    
+    return [mainNodes count] > 0;
 }
 
 - (NSArray *)itemsReturningError:(NSError **)outError {
@@ -62,17 +63,16 @@
 
     NSMutableArray *items = [NSMutableArray arrayWithCapacity:0];
     
-    NSArray *mainNodes = [[[self xmlDocument] rootElement] 
descendantOrSelfNodesWithClassName:@"hcite" error:outError];
+    // get hcite elements, avoid creating top-level refs from containers:
+    NSString *hciteXpath = @".//*[contains(concat(' ',normalize-space(@class), 
' '),' hcite ') and not(contains(concat(' ', normalize-space(@class), ' '),' 
container '))]";
+    NSArray *mainNodes = [[[self domDocument] documentElement] 
nodesForXPath:hciteXpath];
     
     
-    for (NSXMLNode *obj in mainNodes) {
+    for (DOMNode *obj in mainNodes) {
         NSMutableDictionary *rd = nil;
         NSString *type = nil;
         
-        // avoid creating top-level refs from containers:
-        if([[obj classNames] containsObject:@"container"]) continue;
-        
-        rd = [self dictionaryFromCitationNode:obj];
+        rd = [self dictionaryFromCitationNode:obj isContainer:NO];
         type = [rd objectForKey:BDSKTypeString];
         [rd removeObjectForKey:BDSKTypeString];
         
@@ -84,22 +84,17 @@
     return items;
 }
 
-- (NSMutableDictionary *)dictionaryFromCitationNode:(NSXMLNode *)citationNode{
+- (NSMutableDictionary *)dictionaryFromCitationNode:(DOMNode *)citationNode 
isContainer:(BOOL)isContainer{
     BDSKTypeManager *typeMan = [BDSKTypeManager sharedManager];
     NSMutableDictionary *rd = [NSMutableDictionary dictionaryWithCapacity:0];
     
-    NSError *err = nil;
-    NSUInteger i = 0;
-    
     // find type but not type that's a descendant of 'container'.
-    NSArray *typeNodes = [citationNode 
descendantOrSelfNodesWithClassName:@"type" error:&err];
+    NSArray *typeNodes = [citationNode 
descendantOrSelfNodesWithClassName:@"type"];
     
     NSString *typeString = nil;
-    for (i = 0; i < [typeNodes count]; i++) {
-        NSXMLNode *node = [typeNodes objectAtIndex:i];
-        if(![[citationNode classNames] containsObject:@"container"] &&
-           [node hasParentWithClassName:@"container"] ) continue;
-        typeString = [node fullStringValueIfABBR];
+    for (DOMNode *node in typeNodes) {
+        if(isContainer || [node hasParentWithClassName:@"container"] == NO)
+            typeString = [node fullStringValueIfABBR];
     }
     
     if(typeString != nil){
@@ -111,187 +106,151 @@
     
     // find title node
     
-    NSArray *titleNodes = [citationNode 
descendantOrSelfNodesWithClassName:@"title" error:&err];
+    NSArray *titleNodes = [citationNode 
descendantOrSelfNodesWithClassName:@"title"];
     
-    for(i = 0; i < [titleNodes count]; i++){
-        NSXMLNode *node = [titleNodes objectAtIndex:i];
-        if(![[citationNode classNames] containsObject:@"container"] &&
-           [node hasParentWithClassName:@"container"]){
-            // note: todo - avoid second hasParentWithClassName by finding 
container 
-            // nodes first and caching those then checking against them here. 
(if necessary)
-            continue; // deal with this citation's container later
-        }
-        
-        [rd setObject:[node stringValue] forKey:BDSKTitleString];
+    for(DOMNode *node in titleNodes){
+        // deal with this citation's container later
+        // note: todo - avoid second hasParentWithClassName by finding 
container
+        // nodes first and caching those then checking against them here. (if 
necessary)
+        if(isContainer || [node hasParentWithClassName:@"container"] == NO)
+            [rd setObject:[node stringValue] forKey:BDSKTitleString];
     }
     
     // find authors
-
-    NSArray *authorNodes = [citationNode 
descendantOrSelfNodesWithClassName:@"creator" error:&err];
+    
+    NSString *authorXpath = @".//*[contains(concat(' 
',normalize-space(@class), ' '),' creator ') and contains(concat(' 
',normalize-space(@class), ' '),' vcard ')]";
+    NSArray *authorNodes = [citationNode nodesForXPath:authorXpath];
     NSMutableString *BTAuthString = [NSMutableString stringWithCapacity:0];
     
-    for(i = 0; i < [authorNodes count]; i++){
-        NSXMLNode *node = [authorNodes objectAtIndex:i];
-        if (! [[node classNames] containsObject:@"vcard"]) continue;
-        
-        if(i > 0)[BTAuthString appendFormat:@" and "];
-        
+    for(DOMNode *node in authorNodes){
+        if ([BTAuthString length]) [BTAuthString appendFormat:@" and "];
         [BTAuthString appendString:[self BTAuthorStringFromVCardNode:node]];
-        
     }
     [rd setObject:BTAuthString forKey:BDSKAuthorString];
     
     // find keywords
     
-    NSArray *tagNodes = [citationNode nodesForXPath:@".//*[contains(concat(' 
', normalize-space(@rel), ' '), ' tag ')]" error:&err];
-     NSMutableString *BTKeywordString = [NSMutableString stringWithCapacity:0];
-     
-     for(i = 0; i < [tagNodes count]; i++){
-         NSXMLNode *node = [tagNodes objectAtIndex:i];
-         
-         if(i > 0)[BTKeywordString appendFormat:@"; "];
-         
-         [BTKeywordString appendString:[node stringValue]];
-         
-     }
-     [rd setObject:BTKeywordString forKey:BDSKKeywordsString];
-     
-     // find description (append multiple descriptions to avoid data loss)
-     
-     NSMutableArray *descNodes = [NSMutableArray arrayWithCapacity:0];
-     [descNodes addObjectsFromArray:[citationNode 
descendantOrSelfNodesWithClassName:@"description" error:&err]];
-     [descNodes addObjectsFromArray:[citationNode 
descendantOrSelfNodesWithClassName:@"abstract" error:&err]];
-     
-     NSMutableString *BTDescString = [NSMutableString stringWithCapacity:0];
-     
-     for(i = 0; i < [descNodes count]; i++){
-         NSXMLNode *node = [descNodes objectAtIndex:i];
-         
-         if(i > 0)[BTDescString appendFormat:@"\n"];
-         
-         [BTDescString appendString:[node stringValue]];
-         
-     }
-     [rd setObject:BTDescString forKey:BDSKAbstractString];
-     
-     
-     // find date published
-     
-     NSArray *datePublishedNodes = [citationNode 
descendantOrSelfNodesWithClassName:@"date-published" error:&err];
-     
-     if([datePublishedNodes count] > 0) {
-         NSXMLNode *datePublishedNode = [datePublishedNodes objectAtIndex:0]; 
// Only use the first such node.
-         NSDate *datePublished = [self dateFromNode:datePublishedNode];
-         NSDateFormatter *formatter = [[NSDateFormatter alloc] init];
-         [formatter setFormatterBehavior:NSDateFormatterBehavior10_4];
-         [formatter setDateFormat:@"yyyy"];
-         [rd setObject:[formatter stringFromDate:datePublished] 
forKey:BDSKYearString];
-         [formatter setDateFormat:@"MMMM"];
-         [rd setObject:[formatter stringFromDate:datePublished] 
forKey:BDSKMonthString];
-         [formatter release];
-     }
-     
-     // find issue
-     
-     NSArray *issueNodes = [citationNode 
descendantOrSelfNodesWithClassName:@"issue" error:&err];
-     
-     if([issueNodes count] > 0) {
-         NSXMLNode *issueNode = [issueNodes objectAtIndex:0]; // Only use the 
first such node.
+    NSArray *tagNodes = [citationNode nodesForXPath:@".//*[contains(concat(' 
', normalize-space(@rel), ' '), ' tag ')]"];
+    
+    [rd setObject:[[tagNodes valueForKey:@"stringValue"] 
componentsJoinedByString:@"; "] forKey:BDSKKeywordsString];
+    
+    // find description (append multiple descriptions to avoid data loss)
+    
+    NSMutableArray *descNodes = [NSMutableArray arrayWithCapacity:0];
+    [descNodes addObjectsFromArray:[citationNode 
descendantOrSelfNodesWithClassName:@"description"]];
+    [descNodes addObjectsFromArray:[citationNode 
descendantOrSelfNodesWithClassName:@"abstract"]];
+    
+    [rd setObject:[[descNodes valueForKey:@"stringValue"] 
componentsJoinedByString:@"\n"] forKey:BDSKAbstractString];
+    
+    // find date published
+    
+    NSArray *datePublishedNodes = [citationNode 
descendantOrSelfNodesWithClassName:@"date-published"];
+    
+    if([datePublishedNodes count] > 0) {
+        DOMNode *datePublishedNode = [datePublishedNodes objectAtIndex:0]; // 
Only use the first such node.
+        NSDate *datePublished = [self dateFromNode:datePublishedNode];
+        NSDateFormatter *formatter = [[NSDateFormatter alloc] init];
+        [formatter setFormatterBehavior:NSDateFormatterBehavior10_4];
+        [formatter setDateFormat:@"yyyy"];
+        [rd setObject:[formatter stringFromDate:datePublished] 
forKey:BDSKYearString];
+        [formatter setDateFormat:@"MMMM"];
+        [rd setObject:[formatter stringFromDate:datePublished] 
forKey:BDSKMonthString];
+        [formatter release];
+    }
+    
+    // find issue
+    
+    NSArray *issueNodes = [citationNode 
descendantOrSelfNodesWithClassName:@"issue"];
+    
+    if([issueNodes count] > 0) {
+        // Only use the first such node.
+        [rd setObject:[[issueNodes objectAtIndex:0] stringValue] 
forKey:@"Issue"];
+    }
+    
+    // find pages
+    
+    NSArray *pagesNodes = [citationNode 
descendantOrSelfNodesWithClassName:@"pages"];
+    
+    if([pagesNodes count] > 0) {
+        // Only use the first such node.
+        [rd setObject:[[pagesNodes objectAtIndex:0] stringValue] 
forKey:BDSKPagesString];
+    }
+    
+    // find URI
+    
+    NSArray *URINodes = [citationNode 
descendantOrSelfNodesWithClassName:@"uri"];
+    
+    if([URINodes count] > 0) {
+        DOMElement *URINode = [URINodes objectAtIndex:0]; // Only use the 
first such node.
+        NSString *URIString = nil;
+        
+        if([[URINode nodeName] isCaseInsensitiveEqual:@"a"]){
+            URIString = [URINode getAttribute:@"href"];
+        }else{
+            URIString = [URINode fullStringValueIfABBR];
+        }
+        
+        if([URIString hasCaseInsensitivePrefix:@"http://";] || [URIString 
hasCaseInsensitivePrefix:@"https://";]){
+            [rd setObject:URIString forKey:BDSKUrlString];
+        } else {
+            [rd setObject:URIString forKey:@"Uri"];
+            
+        }
+    }
+    
+    // get container info:
+    // *** NOTE: should do this last, to avoid overwriting data
+    
+    NSString *containerXpath = @".//*[contains(concat(' 
',normalize-space(@class), ' '),' hcite ') and contains(concat(' ', 
normalize-space(@class), ' '),' container ')]";
+    NSArray *containerNodes = [citationNode nodesForXPath:containerXpath];
+    
+    if([containerNodes count] > 0) {
+        DOMNode *containerNode = [containerNodes objectAtIndex:0];
 
-         [rd setObject:[issueNode stringValue] forKey:@"Issue"];
-     }     
-     
-     // find pages
-     
-     NSArray *pagesNodes = [citationNode 
descendantOrSelfNodesWithClassName:@"pages" error:&err];
-     
-     if([pagesNodes count] > 0) {
-         NSXMLNode *pagesNode = [pagesNodes objectAtIndex:0]; // Only use the 
first such node.
-         
-         [rd setObject:[pagesNode stringValue] forKey:BDSKPagesString];
-     }  
-     
-     // find URI
-     
-     NSArray *URINodes = [citationNode 
descendantOrSelfNodesWithClassName:@"uri" error:&err];
-     
-     if([URINodes count] > 0) {
-         NSXMLNode *URINode = [URINodes objectAtIndex:0]; // Only use the 
first such node.
-         NSString *URIString = nil;
-         
-         if([[URINode name] isEqualToString:@"a"]){
-             URIString = [URINode stringValueOfAttribute:@"href"];
-         }else{
-             URIString = [URINode fullStringValueIfABBR];
-         }
-         
-         if([URIString hasCaseInsensitivePrefix:@"http://";] || [URIString 
hasCaseInsensitivePrefix:@"https://";]){
-             [rd setObject:URIString forKey:BDSKUrlString];
-         } else {
-             [rd setObject:URIString forKey:@"Uri"];
+        NSString *citationType = [rd objectForKey:BDSKTypeString];
 
-         }
-     }  
-     
-     // get container info: 
-     // *** NOTE: should do this last, to avoid overwriting data
-     
-     NSArray *containerNodes = [citationNode 
descendantOrSelfNodesWithClassName:@"container"
-                                                                          
error:&err];
-     
-     if([containerNodes count] > 0) {
-         NSXMLNode *containerNode = [containerNodes objectAtIndex:0];
-         
-         if([[containerNode classNames] containsObject:@"hcite"]){
-             NSString *citationType = [rd objectForKey:BDSKTypeString];
-             
-             NSMutableDictionary *containerDict = [NSMutableDictionary 
dictionaryWithDictionary:[self dictionaryFromCitationNode:containerNode]];
-             NSString *containerTitle = [containerDict 
objectForKey:BDSKTitleString];
-             NSString *containerType = [containerDict 
objectForKey:BDSKTypeString];
-             
-             if(containerType != nil && containerTitle != nil){
-                 // refine type based on container type
-                 if([citationType isEqualToString:@"misc"]){
-                     if([containerType isEqualToString:@"journal"]){
-                         [rd setObject:BDSKArticleString 
forKey:BDSKTypeString];
-                     }else if([containerType isEqualToString:@"proceedings"]){
-                         [rd setObject:BDSKInproceedingsString 
forKey:BDSKTypeString];
-                     }
+        NSMutableDictionary *containerDict = [NSMutableDictionary 
dictionaryWithDictionary:[self dictionaryFromCitationNode:containerNode 
isContainer:YES]];
+        NSString *containerTitle = [containerDict 
objectForKey:BDSKTitleString];
+        NSString *containerType = [containerDict objectForKey:BDSKTypeString];
+
+        if(containerType != nil && containerTitle != nil){
+            // refine type based on container type
+            if([citationType isEqualToString:@"misc"]){
+                if([containerType isEqualToString:@"journal"]){
+                    [rd setObject:BDSKArticleString forKey:BDSKTypeString];
+                }else if([containerType isEqualToString:@"proceedings"]){
+                    [rd setObject:BDSKInproceedingsString 
forKey:BDSKTypeString];
+                }
+            }
             
-                 }
-
-                 // refresh:
-                 citationType = [rd objectForKey:BDSKTypeString];
-                 
-                 if([citationType isEqualToString:BDSKArticleString]){
-                     [rd setObject:containerTitle forKey:BDSKJournalString];
-                 }else if([citationType 
isEqualToString:BDSKIncollectionString] ||
-                          [citationType 
isEqualToString:BDSKInproceedingsString]){
-                     [rd setObject:containerTitle forKey:BDSKBooktitleString];
-                 }else if([citationType isEqualToString:BDSKInbookString]){
-                     // TODO: this case may need some tweaking
-                     [rd setObject:[rd objectForKey:BDSKTitleString] 
forKey:BDSKChapterString];
-                     [rd setObject:containerTitle forKey:BDSKTitleString];
-                 }else{
-                     [rd setObject:containerTitle forKey:BDSKBooktitleString];
-                 }
-             }
-             // Containers have more info than just title and type:
-             // TODO: do we only dump it in or do we need to do more?
-             [containerDict removeObjectsForKeys:[rd allKeys]];
-             [rd addEntriesFromDictionary:containerDict];
-         }
-         
+            // refresh:
+            citationType = [rd objectForKey:BDSKTypeString];
+            
+            if([citationType isEqualToString:BDSKArticleString]){
+                [rd setObject:containerTitle forKey:BDSKJournalString];
+            }else if([citationType isEqualToString:BDSKIncollectionString] || 
[citationType isEqualToString:BDSKInproceedingsString]){
+                [rd setObject:containerTitle forKey:BDSKBooktitleString];
+            }else if([citationType isEqualToString:BDSKInbookString]){
+                // TODO: this case may need some tweaking
+                [rd setObject:[rd objectForKey:BDSKTitleString] 
forKey:BDSKChapterString];
+                [rd setObject:containerTitle forKey:BDSKTitleString];
+            }else{
+                [rd setObject:containerTitle forKey:BDSKBooktitleString];
+            }
+        }
+        // Containers have more info than just title and type:
+        // TODO: do we only dump it in or do we need to do more?
+        [containerDict removeObjectsForKeys:[rd allKeys]];
+        [rd addEntriesFromDictionary:containerDict];
+        
      }
      
      return rd;
 }
 
-- (NSString *)BTAuthorStringFromVCardNode:(NSXMLNode *)node{
-    NSError *err;
-    
+- (NSString *)BTAuthorStringFromVCardNode:(DOMNode *)node{
     // note: may eventually need to do more than just look at fn and abbr.
-    NSArray *fnNodes = [node descendantOrSelfNodesWithClassName:@"fn" 
error:&err];
+    NSArray *fnNodes = [node descendantOrSelfNodesWithClassName:@"fn"];
     
     if([fnNodes count] < 1) return @"";
     
@@ -298,7 +257,7 @@
     return [[fnNodes objectAtIndex:0] fullStringValueIfABBR];
 }
 
-- (NSDate *)dateFromNode:(NSXMLNode *)node{
+- (NSDate *)dateFromNode:(DOMNode *)node{
     
     NSString *fullString = [node fullStringValueIfABBR];
     NSDate *d;

Modified: trunk/bibdesk/BDSKHubmedParser.m
===================================================================
--- trunk/bibdesk/BDSKHubmedParser.m    2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKHubmedParser.m    2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,12 +39,11 @@
 #import <WebKit/WebKit.h>
 #import "BibItem.h"
 #import <AGRegex/AGRegex.h>
-#import "NSXMLNode_BDSKExtensions.h"
 #import "NSString_BDSKExtensions.h"
 
 @implementation BDSKHubmedParser
 
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
     
     if ([url host] == nil || [[url host] 
isCaseInsensitiveEqual:@"www.hubmed.org"] == NO || 
         [[url path] isCaseInsensitiveEqual:@"/display.cgi"] == NO){

Modified: trunk/bibdesk/BDSKIACRParser.m
===================================================================
--- trunk/bibdesk/BDSKIACRParser.m      2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKIACRParser.m      2018-08-22 14:19:07 UTC (rev 22509)
@@ -38,7 +38,7 @@
 
 #import "BDSKIACRParser.h"
 #import "BibItem.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
 #import "NSURL_BDSKExtensions.h"
 #import <AGRegex/AGRegex.h>
 
@@ -45,7 +45,7 @@
 
 @implementation BDSKIACRParser
 
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
     
     if ([url hasDomain:@"eprint.iacr.org"] == NO)
         return NO;
@@ -75,30 +75,42 @@
     
        // construct the source item(s) to parse
        NSArray *sources = nil;
-    NSXMLElement *rootElement = [[self xmlDocument] rootElement];
+    DOMElement *rootElement = [[self domDocument] documentElement];
     if (isSearch)
-        sources = [rootElement nodesForXPath:@"./body//dt" error:outError];
+        sources = [rootElement nodesForXPath:@"./body//dt"];
     else
         sources = [NSArray arrayWithObjects:rootElement, nil];
        
-    for (NSXMLNode *xmlNode in sources) {
+    for (DOMNode *node in sources) {
                
                NSMutableDictionary *pubFields = [NSMutableDictionary 
dictionary];
                NSArray *filesArray = nil;
-        NSString *pathToSearch;
+        NSString *pathToSearch = nil;
+        NSArray *nodes;
+        NSString *string;
         
         if (isSearch) {
             // set title
-            [xmlNode searchXPath:@"following-sibling::dd/b" addTo:pubFields 
forKey:BDSKTitleString];
+            nodes = [node nodesForXPath:@"following-sibling::dd/b"];
+            if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+                [pubFields setObject:string forKey:BDSKTitleString];
+            nodes = [node 
nodesForXPath:@"following-sibling::dd[position()=2]/em"];
             // set authors
-            [xmlNode searchXPath:@"following-sibling::dd[position()=2]/em" 
addTo:pubFields forKey:BDSKAuthorString];
+            if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+                [pubFields setObject:string forKey:BDSKAuthorString];
+            nodes = [node nodesForXPath:@".//a/@href"];
             // to get year and report number
-            pathToSearch = [xmlNode searchXPath:@".//a/@href" addTo:nil 
forKey:nil];
+            if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+                pathToSearch = string;
         } else {
             // set title
-            [xmlNode searchXPath:@".//b" addTo:pubFields 
forKey:BDSKTitleString];
+            nodes = [node nodesForXPath:@".//b"];
+            if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+                [pubFields setObject:string forKey:BDSKTitleString];
             // set authors
-            [xmlNode searchXPath:@".//i" addTo:pubFields 
forKey:BDSKAuthorString];
+            nodes = [node nodesForXPath:@".//i"];
+            if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+                [pubFields setObject:string forKey:BDSKAuthorString];
             // to get year and report number
             pathToSearch = [url path];
         }

Modified: trunk/bibdesk/BDSKIEEEXploreParser.m
===================================================================
--- trunk/bibdesk/BDSKIEEEXploreParser.m        2018-08-22 06:30:25 UTC (rev 
22508)
+++ trunk/bibdesk/BDSKIEEEXploreParser.m        2018-08-22 14:19:07 UTC (rev 
22509)
@@ -38,7 +38,7 @@
 #import "BDSKIEEEXploreParser.h"
 #import <WebKit/WebKit.h>
 #import "BibItem.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
 #import "NSError_BDSKExtensions.h"
 #import "NSArray_BDSKExtensions.h"
 #import <AGRegex/AGRegex.h>
@@ -45,13 +45,13 @@
 #import "NSString_BDSKExtensions.h"
 
 // sometimes the link says AbstractPlus, sometimes it only says Abstract. This 
should catch both:
-static NSString *containsAbstractPlusLinkNode = 
@"./body//a[contains(lower-case(text()),'abstract')]";
+static NSString *containsAbstractPlusLinkNode = 
@"./body//a[contains(translate(text(),'ABSTRACT','abstract'),'abstract')]";
 static NSString *abstractPageURLPath = @"/xpls/abs_all.jsp";
 static NSString *searchResultPageURLPath = @"/search/srchabstract.jsp";
 
 @implementation BDSKIEEEXploreParser
 
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
     
     if (nil == [url host] || [[url host] 
isCaseInsensitiveEqual:@"ieeexplore.ieee.org"] == NO)
         return NO;
@@ -59,7 +59,7 @@
        if ([[url path] isCaseInsensitiveEqual:abstractPageURLPath] || [[url 
path] isCaseInsensitiveEqual:searchResultPageURLPath])
         return YES;
     
-    return [[[xmlDocument rootElement] 
nodesForXPath:containsAbstractPlusLinkNode error:NULL] count] > 0;
+    return [[[domDocument documentElement] 
nodesForXPath:containsAbstractPlusLinkNode] count] > 0;
 }
 
 - (NSArray *)itemsReturningError:(NSError **)outError {
@@ -86,10 +86,10 @@
         [abstractPageURLs addObject:url];
     } else {
         // parse all links on a TOC page
-        NSArray *abstractPlusLinkNodes = [[[self xmlDocument] rootElement] 
nodesForXPath:containsAbstractPlusLinkNode error:outError];
+        NSArray *abstractPlusLinkNodes = [[[self domDocument] documentElement] 
nodesForXPath:containsAbstractPlusLinkNode];
                
-        for (NSXMLNode *aplinknode in abstractPlusLinkNodes) {
-            NSString *hrefValue = [aplinknode stringValueOfAttribute:@"href"];
+        for (DOMElement *aplinknode in abstractPlusLinkNodes) {
+            NSString *hrefValue = [aplinknode getAttribute:@"href"];
             [abstractPageURLs addObject:[NSURL URLWithString:hrefValue 
relativeToURL:url]];
         }
     }

Modified: trunk/bibdesk/BDSKIUCrParser.m
===================================================================
--- trunk/bibdesk/BDSKIUCrParser.m      2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKIUCrParser.m      2018-08-22 14:19:07 UTC (rev 22509)
@@ -37,7 +37,6 @@
  */
 
 #import "BDSKIUCrParser.h"
-#import "NSXMLNode_BDSKExtensions.h"
 
 
 @implementation BDSKIUCrParser
@@ -44,8 +43,8 @@
 
 + (NSString *)citationNodeXPath { return 
@"./body//table[@class='citation']//td/input[@name='cnor' and 
string-length(@value)!=0]"; }
 
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
-    NSString *cnorValue = [node stringValueOfAttribute:@"value"];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+    NSString *cnorValue = [(DOMElement *)node getAttribute:@"value"];
     return 
[@"//scripts.iucr.org/cgi-bin/biblio?Action=download&saveas=BIBTeX&cnor=" 
stringByAppendingString:cnorValue];
 }
 

Modified: trunk/bibdesk/BDSKInspireParser.m
===================================================================
--- trunk/bibdesk/BDSKInspireParser.m   2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKInspireParser.m   2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,7 +39,7 @@
 #import "BDSKInspireParser.h"
 #import "BDSKBibTeXParser.h"
 #import "NSError_BDSKExtensions.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
 #import "NSURL_BDSKExtensions.h"
 
 
@@ -60,7 +60,7 @@
     return bibtexString;
 }
 
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url {
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url {
     if ([url host] == nil || [[[url host] lowercaseString] 
isEqualToString:@"inspirehep.net"] == NO)
         return NO;
     
@@ -67,8 +67,7 @@
     if ([url hasFirstPathComponent:@"record"])
         return YES;
     
-    NSError *error = nil;
-    NSUInteger nodeCount = [[[xmlDocument rootElement] nodesForXPath:[self 
citationNodeXPath] error:&error] count];
+    NSUInteger nodeCount = [[[domDocument documentElement] nodesForXPath:[self 
citationNodeXPath]] count];
     
     return nodeCount > 0;
 }
@@ -79,8 +78,8 @@
         NSMutableArray *items = [NSMutableArray array];
         NSString *bibtexString = nil;
         
-        NSArray *preNodes = [[[self xmlDocument] rootElement] 
nodesForXPath:@"./body/div/div/pre[contains(text(),'@')]" error:outError];
-        bibtexString = [[[[preNodes firstObject] stringValue] retain] 
autorelease];
+        NSArray *preNodes = [[[self domDocument] documentElement] 
nodesForXPath:@"./body/div/div/pre[contains(text(),'@')]"];
+        bibtexString = [[[[preNodes firstObject] textContent] retain] 
autorelease];
         
         NSArray *bibtexItems = nil;
         if (bibtexString)

Modified: trunk/bibdesk/BDSKJSTORWebParser.m
===================================================================
--- trunk/bibdesk/BDSKJSTORWebParser.m  2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKJSTORWebParser.m  2018-08-22 14:19:07 UTC (rev 22509)
@@ -37,7 +37,6 @@
  */
 
 #import "BDSKJSTORWebParser.h"
-#import "NSXMLNode_BDSKExtensions.h"
 #import "NSURL_BDSKExtensions.h"
 #import <AGRegex/AGRegex.h>
 
@@ -44,7 +43,7 @@
 
 @implementation BDSKJSTORWebParser
 
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
     
     if ([url hasDomain:@"jstor.org"] == NO)
         return NO;

Modified: trunk/bibdesk/BDSKMASParser.m
===================================================================
--- trunk/bibdesk/BDSKMASParser.m       2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKMASParser.m       2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,7 +39,6 @@
 #import "BDSKMASParser.h"
 #import "BibItem.h"
 #import "NSString_BDSKExtensions.h"
-#import "NSXMLNode_BDSKExtensions.h"
 #import <AGRegex/AGRegex.h>
 
 
@@ -47,8 +46,8 @@
 
 + (NSString *)citationNodeXPath { return 
@"./body//a[starts-with(@href,'../../UserInput/EditPublication?id=') or 
starts-with(@href,'Publication/') or starts-with(@href,'/Publication/')]"; }
 
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
-    NSString *href = [node stringValueOfAttribute:@"href"];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+    NSString *href = [(DOMElement *)node getAttribute:@"href"];
     
     NSString *pattern = 
@"^\\.\\./\\.\\./UserInput/EditPublication\\?id\\=([0-9]+)$";
     if ([href hasPrefix:@"Publication/"])

Modified: trunk/bibdesk/BDSKMathSciNetParser.m
===================================================================
--- trunk/bibdesk/BDSKMathSciNetParser.m        2018-08-22 06:30:25 UTC (rev 
22508)
+++ trunk/bibdesk/BDSKMathSciNetParser.m        2018-08-22 14:19:07 UTC (rev 
22509)
@@ -50,7 +50,7 @@
 
 // MathSciNet is mirrored across different servers, don't use the server name 
to recognise the URL.
 // Instead recognise all URLs beginning with 'mathscinet', to match both 
general MatSciNet URLs like <https://www.ams.org/mathscinet/...>  and 
MathSciNet reference URLS <https://www.ams.org/mathscinet-getitem?...>.
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
     
     if ([url hasFirstPathComponent:@"mathscinet"] == NO)
         return NO;
@@ -57,7 +57,7 @@
     
     AGRegex * MRRegexp = [AGRegex regexWithPattern:@"MR0*([0-9]+)" 
options:AGRegexMultiline];
     
-    return nil != [MRRegexp findInString:[xmlDocument XMLString]];
+    return nil != [MRRegexp findInString:[[domDocument documentElement] 
innerHTML]];
 }
 
 // Finds strings of type MR1234567 in the current page.
@@ -66,7 +66,7 @@
 - (NSArray *)itemsReturningError:(NSError **)outError {
 
        AGRegex * MRRegexp = [AGRegex regexWithPattern:@"MR0*([0-9]+)" 
options:AGRegexMultiline];
-       NSArray * regexpResults = [MRRegexp findAllInString:[[self xmlDocument] 
XMLString]];
+       NSArray * regexpResults = [MRRegexp findAllInString:[[[self 
domDocument] documentElement] innerHTML]];
        
     NSArray * requests = nil;
     

Modified: trunk/bibdesk/BDSKNumdamParser.m
===================================================================
--- trunk/bibdesk/BDSKNumdamParser.m    2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKNumdamParser.m    2018-08-22 14:19:07 UTC (rev 22509)
@@ -40,6 +40,7 @@
 #import "BDSKMathSciNetParser.h"
 #import "BDSKZentralblattParser.h"
 #import "BibItem.h"
+#import "DOMNode_BDSKExtensions.h"
 #import "NSURL_BDSKExtensions.h"
 #import <AGRegex/AGRegex.h>
 
@@ -47,13 +48,12 @@
 @implementation BDSKNumdamParser
 
 // Recognise Numdam pages by their server name ending in numdam.org.
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
     
     if ([url hasDomain:@"numdam.org"] == NO)
         return NO;
     
-    NSError *error;
-    NSArray * tableCells = [[xmlDocument rootElement] 
nodesForXPath:@".//td[@id='contenu']" error:&error];
+    NSArray * tableCells = [[domDocument documentElement] 
nodesForXPath:@".//td[@id='contenu']"];
 
     return [tableCells count] > 0;
 }
@@ -62,9 +62,9 @@
 // (Support for MatSciNet is currently commented out as their lookup script 
requires online-style MR1234567 identifiers and NUMDAM uses paper-style 
identifiers a la 16,957b.)
 - (NSArray *)itemsReturningError:(NSError **)outError {
     
-    NSArray * tableCells = [[[self xmlDocument] rootElement] 
nodesForXPath:@".//td[@id='contenu']" error:outError];
-    NSXMLElement * tableCell = [tableCells objectAtIndex:0];
-    NSString * content = [tableCell stringValue];
+    NSArray * tableCells = [[[self domDocument] documentElement] 
nodesForXPath:@".//td[@id='contenu']"];
+    DOMNode * tableCell = [tableCells objectAtIndex:0];
+    NSString * content = [tableCell textContent];
     
     NSArray * rawReferences = [content componentsSeparatedByString:@"\n"];
     

Modified: trunk/bibdesk/BDSKProjectEuclidParser.m
===================================================================
--- trunk/bibdesk/BDSKProjectEuclidParser.m     2018-08-22 06:30:25 UTC (rev 
22508)
+++ trunk/bibdesk/BDSKProjectEuclidParser.m     2018-08-22 14:19:07 UTC (rev 
22509)
@@ -40,6 +40,7 @@
 #import "BDSKMathSciNetParser.h"
 #import "BDSKZentralblattParser.h"
 #import "BibItem.h"
+#import "DOMNode_BDSKExtensions.h"
 #import "NSURL_BDSKExtensions.h"
 #import <AGRegex/AGRegex.h>
 
@@ -47,13 +48,12 @@
 @implementation BDSKProjectEuclidParser
 
 // Recognise Project Euclid pages by their server name ending in 
projecteuclid.org.
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
        
     if ([url hasDomain:@"projecteuclid.org"] == NO)
         return NO;
     
-    NSError *error;
-    NSArray * identifiers = [[xmlDocument rootElement] 
nodesForXPath:@".//div[@id='identifier']/p" error:&error];
+    NSArray * identifiers = [[domDocument documentElement] 
nodesForXPath:@".//div[@id='identifier']/p"];
 
        return [identifiers count] > 0;
 }
@@ -61,7 +61,7 @@
 // Find references for Mathematical Reviews and Zentralblatt Math in the page. 
Then look them up, giving preference to MSN if both are available.
 - (NSArray *)itemsReturningError:(NSError **)outError {
 
-       NSArray * identifiers = [[[self xmlDocument] rootElement] 
nodesForXPath:@".//div[@id='identifier']/p" error:outError];
+       NSArray * identifiers = [[[self domDocument] documentElement] 
nodesForXPath:@".//div[@id='identifier']/p"];
        
     NSArray *MRRequests = nil;
     NSArray *ZMathRequests = nil;
@@ -68,7 +68,7 @@
 
     if ( [identifiers count] ) {
         
-        NSXMLElement * identifier = [identifiers objectAtIndex:0];
+        DOMNode * identifier = [identifiers objectAtIndex:0];
         NSString * identifierString = [identifier stringValue];
         
         AGRegex * MRRegexp = [AGRegex regexWithPattern:@"MR([1-9][0-9]*)" 
options:0];
@@ -87,11 +87,11 @@
         }
         
         // Set up arrays for the lists of MathSciNet and Zentralblatt IDs. 
These will have the ID for the current element at position 0 and contain NSNull 
when no ID is found for the respective service.
-        NSArray * references = [[[self xmlDocument] rootElement] 
nodesForXPath:@".//div[@id='references']/div[@class='ref-block']" 
error:outError];
+        NSArray * references = [[[self domDocument] documentElement] 
nodesForXPath:@".//div[@id='references']/div[@class='ref-block']"];
         NSMutableArray * MRIDs = [NSMutableArray arrayWithObjects:myMRID, nil];
         NSMutableArray * ZMathIDs = [NSMutableArray 
arrayWithObjects:myZMathID, nil];
         
-        for (NSXMLElement * reference in references) {
+        for (DOMElement * reference in references) {
             NSString * referenceString = [reference stringValue];
 
             match = [MRRegexp findInString:referenceString];

Modified: trunk/bibdesk/BDSKSIAMParser.m
===================================================================
--- trunk/bibdesk/BDSKSIAMParser.m      2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKSIAMParser.m      2018-08-22 14:19:07 UTC (rev 22509)
@@ -37,7 +37,6 @@
  */
 
 #import "BDSKSIAMParser.h"
-#import "NSXMLNode_BDSKExtensions.h"
 
 
 @implementation BDSKSIAMParser
@@ -44,8 +43,8 @@
 
 + (NSString *)citationNodeXPath { return @"./body//a[text()='Download 
Citations']"; }
 
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
-    NSMutableString *hrefValue = [[[node stringValueOfAttribute:@"href"] 
mutableCopy] autorelease];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+    NSMutableString *hrefValue = [[[(DOMElement *)node getAttribute:@"href"] 
mutableCopy] autorelease];
     NSRange range = [hrefValue rangeOfString:@"showCitFormats" 
options:NSCaseInsensitiveSearch];
     if (range.location != NSNotFound)
         [hrefValue replaceCharactersInRange:range 
withString:@"downloadCitation"];

Modified: trunk/bibdesk/BDSKScienceDirectParser.m
===================================================================
--- trunk/bibdesk/BDSKScienceDirectParser.m     2018-08-22 06:30:25 UTC (rev 
22508)
+++ trunk/bibdesk/BDSKScienceDirectParser.m     2018-08-22 14:19:07 UTC (rev 
22509)
@@ -37,14 +37,13 @@
  */
 
 #import "BDSKScienceDirectParser.h"
-#import "NSXMLNode_BDSKExtensions.h"
 
 @implementation BDSKScienceDirectParser
 
 + (NSString *)citationNodeXPath { return @"./head/meta[@name='citation_pii']"; 
}
 
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
-    NSString *piiValue = [node stringValueOfAttribute:@"content"];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+    NSString *piiValue = [(DOMElement *)node getAttribute:@"content"];
     return [NSString 
stringWithFormat:@"https://www.sciencedirect.com/sdfe/arp/cite?pii=%@&format=text%%2Fx-bibtex&withabstract=true";,
 piiValue];
 }
 

Modified: trunk/bibdesk/BDSKSpringerParser.m
===================================================================
--- trunk/bibdesk/BDSKSpringerParser.m  2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKSpringerParser.m  2018-08-22 14:19:07 UTC (rev 22509)
@@ -40,7 +40,7 @@
 #import "BibItem.h"
 #import "NSError_BDSKExtensions.h"
 #import "NSArray_BDSKExtensions.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
 #import "BDSKBibTeXParser.h"
 #import <AGRegex/AGRegex.h>
 
@@ -48,35 +48,21 @@
 // does the rest actually still work?
 // was for path = /contents/...
 @interface BDSKSpringerParser (BDSKPrivate)
-+ (BibItem *)newItemFromXMLDocument:(NSXMLDocument *)xmlDocument 
fromURL:(NSURL *)url error:(NSError **)outError;
-+ (NSString *)authorStringFromXMLNode:(NSXMLNode *)xmlNode 
searchXPath:(NSString *)xPath;
++ (BibItem *)newItemFromDocument:(DOMDocument *)domDocument fromURL:(NSURL 
*)url error:(NSError **)outError;
 @end
 
 
 @implementation BDSKSpringerParser
 
-+ (NSString *)citationNodeXPath { return @"./body//a[@data-gtmlabel='BIB']"; }
-
-+ (NSString *)authorStringFromXMLNode:(NSXMLNode *)xmlNode 
searchXPath:(NSString *)xPath {
-       NSError *error = nil;
-       NSArray *authorNodes = [xmlNode nodesForXPath:xPath error:&error];
-    NSMutableArray *authorStrings = [NSMutableArray array];
-    NSXMLNode *node;
-    for (node in authorNodes) {
-        [authorStrings addObject:[node stringValue]];
-    }
-       return [authorStrings componentsJoinedByAnd];
-}
-
-+ (BibItem *)newItemFromXMLDocument:(NSXMLDocument *)xmlDocument 
fromURL:(NSURL *)url error:(NSError **)outError{
++ (BibItem *)newItemFromDocument:(DOMDocument *)domDocument fromURL:(NSURL 
*)url error:(NSError **)outError{
     
-       NSXMLNode *xmlNode = [xmlDocument rootElement];
+       DOMNode *node = [domDocument documentElement];
        NSMutableDictionary *pubFields = [NSMutableDictionary dictionary];
        NSMutableArray *filesArray = nil;
     
     NSString *pubType = BDSKMiscString;
     // set publication type
-    NSString *pubTypeGuess = [xmlNode 
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='primary']/a/@title" addTo:nil forKey:nil];
+    NSString *pubTypeGuess = [[[node 
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='primary']/a/@title"] firstObject] stringValue];
     if (pubTypeGuess != nil) {
         if ([pubTypeGuess isEqualToString:@"Link to the Book of this 
Chapter"]) {
             pubType = BDSKChapterString;
@@ -88,18 +74,28 @@
     }
     
        // set title
-       [xmlNode searchXPath:@".//div[@id='ContentHeading']/div[@class='heading 
primitive']/div[@class='text']/h1" addTo:pubFields forKey:BDSKTitleString];
-       // set book or journal
+    NSString *title = [[[node 
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
primitive']/div[@class='text']/h1"] firstObject] stringValue];
+    if (title != nil)
+        [pubFields setObject:title forKey:BDSKTitleString];
+       
+    // set book or journal
     if ([pubType isEqualToString:BDSKChapterString]) {
-        [xmlNode 
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='primary']/a" addTo:pubFields 
forKey:BDSKBooktitleString];
+        NSString *chapter = [[[node 
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='primary']/a"] firstObject] stringValue];
+        if (chapter != nil)
+            [pubFields setObject:chapter forKey:BDSKBooktitleString];
     } else if ([pubType isEqualToString:BDSKArticleString]) {
-        [xmlNode 
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='primary']/a" addTo:pubFields 
forKey:BDSKJournalString];
+        NSString *journal = [[[node 
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='primary']/a"] firstObject] stringValue];
+        if (journal != nil)
+            [pubFields setObject:journal forKey:BDSKJournalString];
     }
+    
        // set DOI and store for later use
-       NSString *doi = [xmlNode 
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']//span[@class='doi']/span[@class='value']" addTo:pubFields 
forKey:BDSKDoiString];
+    NSString *doi = [[[node 
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']//span[@class='doi']/span[@class='value']"] firstObject] 
stringValue];
+    if (doi != nil)
+        [pubFields setObject:doi forKey:BDSKDoiString];
 
        // set pages
-       NSString *pages = [xmlNode 
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']//span[@class='pagination']" addTo:pubFields 
forKey:BDSKPagesString];
+    NSString *pages = [[[node 
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']//span[@class='pagination']"] firstObject] stringValue];
     if (pages != nil) {
         AGRegex *pagesRegex = [AGRegex 
regexWithPattern:@"^([0-9]*)-([0-9]*)?"];
         AGRegexMatch *match = [pagesRegex findInString:pages];
@@ -112,19 +108,27 @@
             [page appendString:endPage];
             [pubFields setObject:page forKey:BDSKPagesString];
             [page release];
+        } else {
+            [pubFields setObject:pages forKey:BDSKPagesString];
         }
     }
        // set authors
-       [pubFields setValue:[BDSKSpringerParser authorStringFromXMLNode:xmlNode 
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading 
primitive']/div[@class='text']/p[@class='authors']/a"] forKey:BDSKAuthorString];
+    NSString *authors = [[[node 
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
primitive']/div[@class='text']/p[@class='authors']/a"] 
valueForKey:@"stringValue"] componentsJoinedByAnd];
+    if (authors != nil)
+        [pubFields setValue:authors forKey:BDSKAuthorString];
        // set editors
-       [pubFields setValue:[BDSKSpringerParser authorStringFromXMLNode:xmlNode 
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading 
primitive']/div[@class='text']/p[@class='editors']/a"] forKey:BDSKEditorString];
+    NSString *editors = [[[node 
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
primitive']/div[@class='text']/p[@class='editors']/a"] 
valueForKey:@"stringValue"] componentsJoinedByAnd];
+    if (editors != nil)
+        [pubFields setValue:editors forKey:BDSKEditorString];
        // set series
     if ([pubType isEqualToString:BDSKChapterString]) {
-        [xmlNode 
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='secondary']/a" addTo:pubFields 
forKey:BDSKSeriesString];
+        NSString *series = [[[node 
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='secondary']/a"] firstObject] stringValue];
+        if (series != nil)
+            [pubFields setObject:series forKey:BDSKSeriesString];
     }
     
     // volume, number, and year
-    NSString *vyString = [xmlNode 
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='secondary']" addTo:nil forKey:nil];
+    NSString *vyString = [[[node 
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='secondary']"] firstObject] stringValue];
     if (vyString != nil) {
         // parse volume number
                AGRegex *volRegex = [AGRegex regexWithPattern:@"Volume 
([0-9]*)[^0-9]"];
@@ -171,6 +175,8 @@
     
 }
 
++ (NSString *)citationNodeXPath { return @"./body//a[@data-gtmlabel='BIB']"; }
+
 + (NSString *)name {return @"SpringerLink"; }
 
 + (NSString *)address { return @"https://link.springer.com/";; }

Modified: trunk/bibdesk/BDSKWebParser.h
===================================================================
--- trunk/bibdesk/BDSKWebParser.h       2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKWebParser.h       2018-08-22 14:19:07 UTC (rev 22509)
@@ -53,7 +53,6 @@
 
 @interface BDSKWebParser : NSObject {
     DOMDocument *domDocument;
-    NSXMLDocument *xmlDocument;
     NSURL *URL;
     id<BDSKWebParserDelegate> delegate;
     BOOL finishedStarting;
@@ -71,7 +70,6 @@
 #pragma mark Concrete web parser
 
 @property (nonatomic, readonly) DOMDocument *domDocument;
-@property (nonatomic, readonly) NSXMLDocument *xmlDocument;
 @property (nonatomic, readonly) NSURL *URL;
 
 @property (nonatomic, readonly) id<BDSKWebParserDelegate> delegate;
@@ -79,7 +77,7 @@
 // set at the end of -start, to know thereis not more coming, so we may finish
 @property (nonatomic) BOOL finishedStarting;
 
-- (id)initWithDocument:(DOMDocument *)aDomDocument xmlDocument:(NSXMLDocument 
*)aXmlDocument fromURL:(NSURL *)aURL;
+- (id)initWithDocument:(DOMDocument *)aDomDocument fromURL:(NSURL *)aURL;
 
 - (void)startWithDelegate:(id<BDSKWebParserDelegate>)aDelegate;
 - (void)cancel;
@@ -88,7 +86,7 @@
 - (BOOL)canFinishWithItems:(NSArray *)items success:(BOOL *)success;
 
 // this must be implemented by subclasses
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url;
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url;
 
 // main method for subclasses to implement
 - (NSArray *)itemsReturningError:(NSError **)outError;

Modified: trunk/bibdesk/BDSKWebParser.m
===================================================================
--- trunk/bibdesk/BDSKWebParser.m       2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKWebParser.m       2018-08-22 14:19:07 UTC (rev 22509)
@@ -65,7 +65,7 @@
 
 @implementation BDSKWebParser
 
-@synthesize domDocument, xmlDocument, URL, delegate, finishedStarting;
+@synthesize domDocument, URL, delegate, finishedStarting;
 
 + (NSArray *)parsers {
     static NSArray *webParsers = nil;
@@ -98,6 +98,7 @@
 
 // entry point for web group
 + (BDSKWebParser *)parserForDocument:(DOMDocument *)domDocument fromURL:(NSURL 
*)url error:(NSError **)outError{
+    /*
     NSError *error = nil;
     
     NSString *htmlString = [(id)[domDocument documentElement] outerHTML];
@@ -118,10 +119,11 @@
         if(outError) *outError = error;
         return nil;
     }
+    */
     
     Class parserClass = Nil;
     for (parserClass in [self parsers]) {
-        if ([parserClass canParseDocument:domDocument xmlDocument:xmlDoc 
fromURL:url])
+        if ([parserClass canParseDocument:domDocument fromURL:url])
             break;
     }
     
@@ -136,7 +138,7 @@
     
     BDSKASSERT([parserClass isSubclassOfClass:[BDSKWebParser class]]);
 
-    return [[[parserClass alloc] initWithDocument:domDocument 
xmlDocument:xmlDoc fromURL:url] autorelease];
+    return [[[parserClass alloc] initWithDocument:domDocument fromURL:url] 
autorelease];
 }
 
 + (NSArray *)parsersForFeature:(BDSKParserFeature)feature {
@@ -156,8 +158,8 @@
     return nil;
 }
 
-- (id)initWithDocument:(DOMDocument *)aDomDocument xmlDocument:(NSXMLDocument 
*)aXmlDocument fromURL:(NSURL *)aURL {
-    if (aDomDocument == nil || aXmlDocument == nil || aURL == nil) {
+- (id)initWithDocument:(DOMDocument *)aDomDocument fromURL:(NSURL *)aURL {
+    if (aDomDocument == nil || aURL == nil) {
         [self release];
         return nil;
     }
@@ -164,7 +166,6 @@
     self = [super init];
     if (self) {
         domDocument = [aDomDocument retain];
-        xmlDocument = [aXmlDocument retain];
         URL = [aURL retain];
     }
     return self;
@@ -174,7 +175,6 @@
 - (void)dealloc {
     delegate = nil;
     BDSKDESTROY(domDocument);
-    BDSKDESTROY(xmlDocument);
     BDSKDESTROY(URL);
     [super dealloc];
 }
@@ -215,7 +215,7 @@
 
 - (NSArray *)itemsReturningError:(NSError **)outError { return nil; }
 
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url { return NO; }
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url { 
return NO; }
 
 + (NSString *)name {
     NSString *name = NSStringFromClass(self);

Modified: trunk/bibdesk/BDSKZentralblattParser.m
===================================================================
--- trunk/bibdesk/BDSKZentralblattParser.m      2018-08-22 06:30:25 UTC (rev 
22508)
+++ trunk/bibdesk/BDSKZentralblattParser.m      2018-08-22 14:19:07 UTC (rev 
22509)
@@ -50,7 +50,7 @@
 
 // Zentralblatt Math is mirrored across several servers. See 
http://www.zentralblatt-math.org/zmath/en/mirrors/ .
 // Accept URLs whose path begins with zmath. As sometimes paths begin with 
multiple slashes, trim those first.
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument 
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
     
     if ([url hasFirstPathComponent:@"zmath"] == NO)
         return NO;
@@ -57,7 +57,7 @@
     
     AGRegex *ZMathRegexp = [AGRegex regexWithPattern:@"(Zbl|JFM) 
(pre)?([0-9.]*)" options:AGRegexMultiline];
 
-       return nil != [ZMathRegexp findInString:[xmlDocument XMLString]];
+       return nil != [ZMathRegexp findInString:[[domDocument documentElement] 
innerHTML]];
 }
 
 // Find occurrences of strings Zbl [pre]1234.56789 or JFM 12.3456.78 on the 
page.
@@ -66,7 +66,7 @@
 - (NSArray *)itemsReturningError:(NSError **)outError {
 
        AGRegex *ZMathRegexp = [AGRegex regexWithPattern:@"(Zbl|JFM) 
(pre)?([0-9.]*)" options:AGRegexMultiline];
-       NSArray * regexpResults = [ZMathRegexp findAllInString:[[self 
xmlDocument] XMLString]];
+       NSArray * regexpResults = [ZMathRegexp findAllInString:[[[self 
domDocument] documentElement] innerHTML]];
        
     NSArray * requests = nil;
     
@@ -113,7 +113,7 @@
     // If the referring URL's path begins with '/zmath', assume we are using a 
Zentralblatt mirror server before and continue using that.
     // If not, use the default server instead.
     NSString * serverName = [[referrer host] lowercaseString];
-    if ( [BDSKZentralblattParser canParseDocument:nil xmlDocument:nil 
fromURL:referrer] ) {
+    if ( [BDSKZentralblattParser canParseDocument:nil fromURL:referrer] ) {
         if ( [[referrer path] rangeOfString:@"/zmath/ZMATH"].location != 
NSNotFound ) {
             // some mirrors' paths begin with /ZMATH, add that
             serverName = [serverName stringByAppendingString:@"/ZMATH"];

Modified: trunk/bibdesk/DOMNode_BDSKExtensions.h
===================================================================
--- trunk/bibdesk/DOMNode_BDSKExtensions.h      2018-08-22 06:30:25 UTC (rev 
22508)
+++ trunk/bibdesk/DOMNode_BDSKExtensions.h      2018-08-22 14:19:07 UTC (rev 
22509)
@@ -42,14 +42,11 @@
 @interface DOMNode (BDSKExtensions)
 
 - (NSArray *)nodesForXPath:(NSString *)xpath;
+
 - (NSString *)stringValue;
-- (NSString *)stringValueOfAttribute:(NSString *)attrName;
+- (NSString *)stringValuePreservingBreaks;
+- (NSString *)fullStringValueIfABBR;
 - (NSArray *)descendantOrSelfNodesWithClassName:(NSString *)className;
-- (BOOL)hasParentWithClassName:(NSString *)class;
-- (NSArray *)classNames;
-- (NSString *)fullStringValueIfABBR;
-- (NSString *)searchXPath:(NSString *)searchPath addTo:(NSMutableDictionary 
*)dict forKey:(NSString *)key;
-- (NSString *)searchXPath:(NSString *)searchPath addTo:(NSMutableDictionary 
*)dict forKey:(NSString *)key last:(BOOL)last;
-- (NSString *)textStringValue;
+- (BOOL)hasParentWithClassName:(NSString *)className;
 
 @end

Modified: trunk/bibdesk/DOMNode_BDSKExtensions.m
===================================================================
--- trunk/bibdesk/DOMNode_BDSKExtensions.m      2018-08-22 06:30:25 UTC (rev 
22508)
+++ trunk/bibdesk/DOMNode_BDSKExtensions.m      2018-08-22 14:19:07 UTC (rev 
22509)
@@ -43,135 +43,74 @@
 
 - (NSArray *)nodesForXPath:(NSString *)xpath {
     DOMXPathResult *result = [[self ownerDocument] evaluate:xpath 
contextNode:self resolver:nil type:DOM_ANY_TYPE inResult:nil];
-    DOMNode *node;
+    DOMNode *node = [result iterateNext];
     NSMutableArray *nodes = nil;
     
-    while ((node = [result iterateNext])) {
-        if (nodes == nil)
-            nodes = [NSMutableArray array];
-        [nodes addObject:node];
+    if (node) {
+        nodes = [NSMutableArray array];
+        do {
+            [nodes addObject:node];
+        } while ((node = [result iterateNext]));
     }
     
     return nodes;
 }
 
+// DOM keeps all spaces and newlines as in the html, rather than as they are 
interpreted
 - (NSString *)stringValue {
-    return [self textContent];
+    return [[self textContent] 
stringByCollapsingWhitespaceAndNewlinesAndRemovingSurroundingWhitespaceAndNewlines]
 ?: @"";
 }
 
-- (NSString *)stringValueOfAttribute:(NSString *)attrName {
-    NSString *path = [NSString stringWithFormat:@"./@%@", attrName];
-    NSArray *atts = [self nodesForXPath:path];
-    if ([atts count] == 0) return nil;
-    return [[atts objectAtIndex:0] textContent];
-}
-
-- (NSArray *)descendantOrSelfNodesWithClassName:(NSString *)className {
-    NSString *path = [NSString stringWithFormat:@".//*[contains(concat(' ', 
normalize-space(@class), ' '), ' %@ ')]", className];
-    NSArray *ar = [self nodesForXPath:path];
-    return ar;
-}
-
-- (BOOL)hasParentWithClassName:(NSString *)className {
+- (NSString *)stringValuePreservingBreaks {
+    DOMNodeList *children = [self childNodes];
+    NSUInteger i, iMax = [children length];
     
-    DOMNode *parent = [self parentNode];
+    if (iMax == 0)
+        return [self stringValue];
     
-    do {
-        if ([parent nodeType] != DOM_ELEMENT_NODE) return NO; // handles root 
node
-        
-        if ([[parent classNames] containsObject:className])
-            return YES;
-        
-    } while ((parent = [parent parentNode]));
+    NSMutableString *string = [NSMutableString string];
     
-    return NO;
+    for (i = 0; i < iMax; i++) {
+        DOMNode *node = [children item:i];
+        short type = [node nodeType];
+        if (type != DOM_ELEMENT_NODE && type != DOM_TEXT_NODE) continue;
+        if (type == DOM_ELEMENT_NODE && [[node nodeName] 
isCaseInsensitiveEqual:@"br"]) {
+            [string appendString:@"\n"];
+        } else {
+            NSString *s = [node stringValuePreservingBreaks];
+            if ([s length]) {
+                if ([string length] && [string lastCharacter] != '\n' && 
[string firstCharacter] != '\n')
+                    [string appendString:@" "];
+                [string appendString:s];
+            }
+        }
+    }
+    return string;
 }
 
-- (NSArray *)classNames {
-    
-    if([self nodeType] != DOM_ELEMENT_NODE) [NSException 
raise:NSInvalidArgumentException format:@"wrong node kind"];
-    
-    NSMutableArray *array = [NSMutableArray arrayWithCapacity:0];
-    NSError *err = nil;
-    NSArray *classNodes = [self nodesForXPath:@"@class"];
-    
-    if ([classNodes count] == 0)
-        return array;
-    
-    NSAssert ([classNodes count] == 1, @"too many nodes in classNodes");
-    
-    NSXMLNode *classNode = [classNodes objectAtIndex:0];
-    
-    [array addObjectsFromArray:[[classNode stringValue] 
componentsSeparatedByString:@" "]];
-    
-    return array;
-}
-
 - (NSString *)fullStringValueIfABBR {
-    NSError *err;
-    if([self nodeType] != DOM_ELEMENT_NODE) [NSException 
raise:NSInvalidArgumentException format:@"wrong node kind"];
-    
-    if ([[[self nodeName] lowercaseString] isEqualToString:@"abbr"]){
+    if ([[self nodeName] isCaseInsensitiveEqual:@"abbr"]){
         //todo: will need more robust comparison for namespaced node titles.
-        
         // return value of title attribute instead
-        NSArray *titleNodes = [self nodesForXPath:@"@title"];
-        if ([titleNodes count] > 0)
-            return [[titleNodes firstObject] stringValue];
+        NSString *title = [(DOMElement *)self getAttribute:@"title"];
+        if (title)
+            return title;
     }
-    
     return [self stringValue];
 }
 
-- (NSString *)searchXPath:(NSString *)searchPath addTo:(NSMutableDictionary 
*)dict forKey:(NSString *)key {
-    return [self searchXPath:searchPath addTo:dict forKey:key last:NO];
+- (NSArray *)descendantOrSelfNodesWithClassName:(NSString *)className {
+    return [self nodesForXPath:[NSString 
stringWithFormat:@".//*[contains(concat(' ',normalize-space(@class),' '),' %@ 
')]", className]];
 }
 
-- (NSString *)searchXPath:(NSString *)searchPath addTo:(NSMutableDictionary 
*)dict forKey:(NSString *)key last:(BOOL)last {
-    NSArray *nodes = [self nodesForXPath:searchPath];
-    NSString *string = nil;
-    
-    if (nil != nodes && 0 < [nodes count]) {
-        string = [[nodes objectAtIndex:last ? ([nodes count] - 1) : 0] 
stringValue];
-        if (string) {
-            string = [string stringByRemovingSurroundingWhitespaceAndNewlines];
-            [dict setValue:string forKey:key];
-        }
-    }
-    return string;
+- (BOOL)hasParentWithClassName:(NSString *)className {
+    DOMNode *parent = [self parentNode];
+    do {
+        if ([parent nodeType] != DOM_ELEMENT_NODE) return NO; // handles root 
node
+        if ([[[(DOMElement *)parent getAttribute:@"class"] 
componentsSeparatedByString:@" "] containsObject:className])
+            return YES;
+    } while ((parent = [parent parentNode]));
+    return NO;
 }
 
-static void appendTextNodes(NSMutableString *string, DOMNode *node) {
-    if ([node nodeType] == DOM_TEXT_NODE)
-        [string appendString:[node stringValue]];
-    else if ([[[node nodeName] lowercaseString] isEqualToString:@"script"] == 
NO) {
-        DOMNodeList *children = [node childNodes];
-        unsigned i, iMax = [children length];
-        for (i = 0; i < iMax; i++)
-            appendTextNodes(string, [children item:i]);
-    }
-}
-
-- (NSString *)textStringValue {
-    NSMutableString *text = [NSMutableString string];
-    appendTextNodes(text, self);
-    return text;
-}
-
 @end
-
-
-@interface DOMDocument (BDSKExtensions)
-@end
-
-@implementation DOMDocument (BDSKExtensions)
-
-- (NSString *)textStringValue {
-    NSError *error;
-    NSArray *body = [self nodesForXPath:@"./html/body"];
-    if ([body count] == 1)
-        return [[body firstObject] textStringValue];
-    return [[self documentElement] textStringValue];
-}
-
-@end

Modified: trunk/bibdesk/NSString_BDSKExtensions.h
===================================================================
--- trunk/bibdesk/NSString_BDSKExtensions.h     2018-08-22 06:30:25 UTC (rev 
22508)
+++ trunk/bibdesk/NSString_BDSKExtensions.h     2018-08-22 14:19:07 UTC (rev 
22509)
@@ -463,7 +463,7 @@
 - (NSString *)stringByRemovingSurroundingWhitespace;
 - (NSString *)stringByCollapsingWhitespaceAndRemovingSurroundingWhitespace;
 - (NSString *)stringByRemovingSurroundingWhitespaceAndNewlines;
-
+- (NSString 
*)stringByCollapsingWhitespaceAndNewlinesAndRemovingSurroundingWhitespaceAndNewlines;
 - (NSString *)fullyEncodeAsIURI;
 
 - (NSString *)stringByRemovingAliens;

Modified: trunk/bibdesk/NSString_BDSKExtensions.m
===================================================================
--- trunk/bibdesk/NSString_BDSKExtensions.m     2018-08-22 06:30:25 UTC (rev 
22508)
+++ trunk/bibdesk/NSString_BDSKExtensions.m     2018-08-22 14:19:07 UTC (rev 
22509)
@@ -1340,6 +1340,10 @@
     return [self stringByCollapsingAndTrimmingCharactersInSet:[NSCharacterSet 
whitespaceCharacterSet]];
 }
 
+- (NSString 
*)stringByCollapsingWhitespaceAndNewlinesAndRemovingSurroundingWhitespaceAndNewlines
 {
+    return [self stringByCollapsingAndTrimmingCharactersInSet:[NSCharacterSet 
whitespaceAndNewlineCharacterSet]];
+}
+
 // This method is copied and modified from NSString-OFStringExtensions.m
 - (NSString *)fullyEncodeAsIURI {
     static const char hexDigits[16] = {'0', '1', '2', '3', '4', '5', '6', '7', 
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit

Reply via email to