Revision: 22510
          http://sourceforge.net/p/bibdesk/svn/22510
Author:   hofman
Date:     2018-08-22 21:59:05 +0000 (Wed, 22 Aug 2018)
Log Message:
-----------
Catch exceptions from XPath parsing. Method to get node from evaluated XPath 
expression that can be reused

Modified Paths:
--------------
    trunk/bibdesk/BDSKArxivParser.m
    trunk/bibdesk/BDSKIACRParser.m
    trunk/bibdesk/DOMNode_BDSKExtensions.h
    trunk/bibdesk/DOMNode_BDSKExtensions.m

Modified: trunk/bibdesk/BDSKArxivParser.m
===================================================================
--- trunk/bibdesk/BDSKArxivParser.m     2018-08-22 14:19:07 UTC (rev 22509)
+++ trunk/bibdesk/BDSKArxivParser.m     2018-08-22 21:59:05 UTC (rev 22510)
@@ -75,59 +75,77 @@
     BOOL isAbstract = [url hasFirstPathComponent:@"abs"];
     BOOL isSearch = [url hasFirstPathComponent:@"search"];
     BOOL isList = [url hasFirstPathComponent:@"list"];
+    
+    NSString *arxivSearchResultNodePath = nil;
 
-    NSString *arxivSearchResultNodePath = nil;
-    NSString *arxivLinkNodePath;
-    NSString *arxivIDNodePath;
-    NSString *pdfURLNodePath;
-    NSString *doiNodePath;
-    NSString *titleNodePath;
-    NSString *authorsNodePath;
-    NSString *journalNodePath;
-    NSString *abstractNodePath;
-    
-    if (isAbstract) {
+    if (isAbstract)
         arxivSearchResultNodePath = 
@"./body/div[@id='content']/div[@id='abs']/div[@class='leftcolumn']";
-        
-        arxivLinkNodePath = 
@"./div[@class='metatable']/table//td[@class='tablecell arxivid']";
-        arxivIDNodePath = @"./a[contains(text(),'arXiv:')]";
-        
-        pdfURLNodePath = 
@"../div[@class='extra-services']/div[@class='full-text']/ul/li/a[contains(text(),'PDF')]";
-        doiNodePath = @"./div[@class='metatable']/table//td[@class='tablecell 
doi']/a";
-        
-        titleNodePath = @"./h1[contains(concat(' ',normalize-space(@class),' 
'),' title ')]/span/following-sibling::text()";
-        authorsNodePath = @"./div[@class='authors']/a";
-        journalNodePath = 
@"./div[@class='metatable']/table//td[@class='tablecell jref']";
-        abstractNodePath = @"./blockquote[contains(concat(' 
',normalize-space(@class),' '),' abstract ')]";
-    } else if (isSearch) {
+    else if (isSearch)
         arxivSearchResultNodePath = 
@"./body//ol[@class='breathe-horizontal']/li[@class='arxiv-result']";
-        
-        arxivLinkNodePath = @"./div[contains(concat(' 
',normalize-space(@class),' '),' level ')]/p[contains(concat(' 
',normalize-space(@class),' '),' list-title ')]";
-        arxivIDNodePath = @"./a[contains(text(),'arXiv:')]";
-        
-        pdfURLNodePath = @"./span/a[contains(text(),'pdf')]";
-        doiNodePath = 
@"../div/div/div/span[contains(text(),'doi')]/following-sibling::span/a";
-        
-        titleNodePath = @"./p[contains(concat(' ',normalize-space(@class),' 
'),' title ')]";
-        authorsNodePath = @"./p[@class='authors']/a";
-        journalNodePath = @"./p[contains(concat(' ',normalize-space(@class),' 
'),' comments ')]/span[text()='Journal ref:']/following-sibling::text()";
-        abstractNodePath = @"./p[contains(concat(' ',normalize-space(@class),' 
'),' abstract ')]/span[contains(concat(' ',normalize-space(@class),' '),' 
abstract-full ')]";
-    } else if (isList) {
+    else if (isList)
         arxivSearchResultNodePath = @"./body/div[@id='content']/div/dl/dt";
+    else
+        return nil;
         
-        arxivLinkNodePath = @"./span[@class='list-identifier']";
-        arxivIDNodePath = @"./a[contains(text(),'arXiv:')]";
-        
-        pdfURLNodePath = @"./a[contains(text(),'pdf')]";
-        doiNodePath = 
@"./div[@class='list-doi']/span/following-sibling::text()";
-        
-        titleNodePath = @"./div[contains(concat(' ',normalize-space(@class),' 
'),' list-title ')]/span/following-sibling::text()";
-        authorsNodePath = @"./div[@class='list-authors']/a";
-        journalNodePath = 
@"./div[@class='list-journal-ref']/span/following-sibling::text()";
-        abstractNodePath = @"./p";
-    } else {
-        return  nil;
+    // fetch the arxiv search results
+    NSArray *arxivSearchResults = [[[self domDocument] documentElement] 
nodesForXPath:arxivSearchResultNodePath];
+    
+    if ([arxivSearchResults count] == 0)
+        return nil;
+    
+    DOMXPathExpression *arxivLinkNodePath = nil;
+    DOMXPathExpression *arxivIDNodePath = nil;
+    DOMXPathExpression *pdfURLNodePath = nil;
+    DOMXPathExpression *doiNodePath = nil;
+    DOMXPathExpression *arxivNodePath = nil;
+    DOMXPathExpression *titleNodePath = nil;
+    DOMXPathExpression *authorsNodePath = nil;
+    DOMXPathExpression *journalNodePath = nil;
+    DOMXPathExpression *abstractNodePath = nil;
+    
+    @try {
+        if (isAbstract) {
+            arxivLinkNodePath = [[self domDocument] 
createExpression:@"./div[@class='metatable']/table//td[@class='tablecell 
arxivid']" resolver:nil];
+            arxivIDNodePath = [[self domDocument] 
createExpression:@"./a[contains(text(),'arXiv:')]" resolver:nil];
+            
+            pdfURLNodePath = [[self domDocument] 
createExpression:@"../div[@class='extra-services']/div[@class='full-text']/ul/li/a[contains(text(),'PDF')]"
 resolver:nil];
+            doiNodePath = [[self domDocument] 
createExpression:@"./div[@class='metatable']/table//td[@class='tablecell 
doi']/a" resolver:nil];
+            
+            titleNodePath = [[self domDocument] 
createExpression:@"./h1[contains(concat(' ',normalize-space(@class),' '),' 
title ')]/span/following-sibling::text()" resolver:nil];
+            authorsNodePath = [[self domDocument] 
createExpression:@"./div[@class='authors']/a" resolver:nil];
+            journalNodePath = [[self domDocument] 
createExpression:@"./div[@class='metatable']/table//td[@class='tablecell 
jref']" resolver:nil];
+            abstractNodePath = [[self domDocument] 
createExpression:@"./blockquote[contains(concat(' ',normalize-space(@class),' 
'),' abstract ')]" resolver:nil];
+        } else if (isSearch) {
+            arxivLinkNodePath = [[self domDocument] 
createExpression:@"./div[contains(concat(' ',normalize-space(@class),' '),' 
level ')]/p[contains(concat(' ',normalize-space(@class),' '),' list-title ')]" 
resolver:nil];
+            arxivIDNodePath = [[self domDocument] 
createExpression:@"./a[contains(text(),'arXiv:')]" resolver:nil];
+            
+            pdfURLNodePath = [[self domDocument] 
createExpression:@"./span/a[contains(text(),'pdf')]" resolver:nil];
+            doiNodePath = [[self domDocument] 
createExpression:@"../div/div/div/span[contains(text(),'doi')]/following-sibling::span/a"
 resolver:nil];
+            
+            titleNodePath = [[self domDocument] 
createExpression:@"./p[contains(concat(' ',normalize-space(@class),' '),' title 
')]" resolver:nil];
+            authorsNodePath = [[self domDocument] 
createExpression:@"./p[@class='authors']/a" resolver:nil];
+            journalNodePath = [[self domDocument] 
createExpression:@"./p[contains(concat(' ',normalize-space(@class),' '),' 
comments ')]/span[text()='Journal ref:']/following-sibling::text()" 
resolver:nil];
+            abstractNodePath = [[self domDocument] 
createExpression:@"./p[contains(concat(' ',normalize-space(@class),' '),' 
abstract ')]/span[contains(concat(' ',normalize-space(@class),' '),' 
abstract-full ')]" resolver:nil];
+        } else if (isList) {
+            arxivLinkNodePath = [[self domDocument] 
createExpression:@"./span[@class='list-identifier']" resolver:nil];
+            arxivIDNodePath = [[self domDocument] 
createExpression:@"./a[contains(text(),'arXiv:')]" resolver:nil];
+            
+            pdfURLNodePath = [[self domDocument] 
createExpression:@"./a[contains(text(),'pdf')]" resolver:nil];
+            doiNodePath = [[self domDocument] 
createExpression:@"./div[@class='list-doi']/span/following-sibling::text()" 
resolver:nil];
+            
+            arxivNodePath = [[self domDocument] 
createExpression:@"./following-sibling::dd/div[@class='meta']" resolver:nil];
+
+            titleNodePath = [[self domDocument] 
createExpression:@"./div[contains(concat(' ',normalize-space(@class),' '),' 
list-title ')]/span/following-sibling::text()" resolver:nil];
+            authorsNodePath = [[self domDocument] 
createExpression:@"./div[@class='list-authors']/a" resolver:nil];
+            journalNodePath = [[self domDocument] 
createExpression:@"./div[@class='list-journal-ref']/span/following-sibling::text()"
 resolver:nil];
+            abstractNodePath = [[self domDocument] createExpression:@"./p" 
resolver:nil];
+        } else {
+            return  nil;
+        }
     }
+    @catch (id e) {
+        NSLog(@"Ignoring XPath exception: %@", e);
+    }
     
     AGRegex *eprintRegex1 = [AGRegex 
regexWithPattern:@"([0-9]{2})([0-9]{2})\\.([0-9]{4})"
                                               options:AGRegexMultiline];
@@ -141,12 +159,6 @@
     AGRegex *journalRegex3 = [AGRegex 
regexWithPattern:@"(.+[^0-9])([0-9]+):(.*),([0-9]{4})"
                                                options:AGRegexMultiline];
     
-    // fetch the arxiv search results
-    NSArray *arxivSearchResults = nil;
-    DOMElement *rootElement = [[self domDocument] documentElement];
-    
-    arxivSearchResults = [rootElement nodesForXPath:arxivSearchResultNodePath];
-    
     BOOL addLinkedFiles = NO == [[NSUserDefaults standardUserDefaults] 
boolForKey:BDSKUseLocalUrlAndUrlKey];
     
     NSMutableArray *items = [NSMutableArray arrayWithCapacity:0];
@@ -154,7 +166,7 @@
     for (DOMNode *arxivSearchResult in arxivSearchResults) {
         
         // fetch the arxiv links
-        NSArray *nodes = [arxivSearchResult nodesForXPath:arxivLinkNodePath];
+        NSArray *nodes = [arxivSearchResult 
nodesForXPathExpression:arxivLinkNodePath];
         
         if (1 != [nodes count]) {
             // If arXiv ever start providing multiple alternative bibtex links 
for a
@@ -171,7 +183,7 @@
         NSString *string = nil;
         
         // search for arXiv ID
-        nodes = [arxivNode nodesForXPath:arxivIDNodePath];
+        nodes = [arxivNode nodesForXPathExpression:arxivIDNodePath];
         if (1 == [nodes count]) {
             node = [nodes firstObject];
             if ((string = [node stringValue])) {
@@ -193,7 +205,7 @@
             arxivNode = arxivSearchResult;
         
         // search for PDF
-        nodes = [arxivNode nodesForXPath:pdfURLNodePath];
+        nodes = [arxivNode nodesForXPathExpression:pdfURLNodePath];
         if (1 == [nodes count]) {
             // successfully found the result PDF url
             if ((string = [[nodes firstObject] getAttribute:@"href"])) {
@@ -207,7 +219,7 @@
         }
         
         // search for DOI
-        nodes = [arxivNode nodesForXPath:doiNodePath];
+        nodes = [arxivNode nodesForXPathExpression:doiNodePath];
         if (1 == [nodes count]) {
             // successfully found the result PDF url
             if ((string = [[nodes firstObject] stringValue])) {
@@ -215,14 +227,14 @@
             }
         }
         
-        if (isList) {
-            arxivNode = [[arxivSearchResult 
nodesForXPath:@"./following-sibling::dd/div[@class='meta']"] firstObject];
+        if (arxivNodePath) {
+            arxivNode = [[arxivSearchResult 
nodesForXPathExpression:arxivNodePath] firstObject];
         } else {
             arxivNode = arxivSearchResult;
         }
         
         // search for title
-        nodes = [arxivNode nodesForXPath:titleNodePath];
+        nodes = [arxivNode nodesForXPathExpression:titleNodePath];
         if (1 == [nodes count]) {
             if ((string = [[nodes firstObject] stringValue])) {
                 [pubFields setValue:string forKey:BDSKTitleString];
@@ -230,7 +242,7 @@
         }
         
         // search for authors
-        nodes = [arxivNode nodesForXPath:authorsNodePath];
+        nodes = [arxivNode nodesForXPathExpression:authorsNodePath];
         if (0 < [nodes count]) {
             if ((string = [[nodes valueForKeyPath:@"stringValue"] 
componentsJoinedByString:@" and "])) {
                 [pubFields setValue:string forKey:BDSKAuthorString];
@@ -238,7 +250,7 @@
         }
         
         // search for journal ref
-        nodes = [arxivNode nodesForXPath:journalNodePath];
+        nodes = [arxivNode nodesForXPathExpression:journalNodePath];
         if (1 == [nodes count]) {
             if ((string = [[nodes firstObject] stringValue])) {
                 // try to get full journal ref components, as "Journal Volume 
(Year) Pages"
@@ -268,7 +280,7 @@
         }
         
         // search for abstract
-        nodes = [arxivNode nodesForXPath:abstractNodePath];
+        nodes = [arxivNode nodesForXPathExpression:abstractNodePath];
         if (1 == [nodes count]) {
             if ((string = [[nodes firstObject] stringValuePreservingBreaks])) {
                 if (isAbstract && [string hasPrefix:@"Abstract: "])

Modified: trunk/bibdesk/BDSKIACRParser.m
===================================================================
--- trunk/bibdesk/BDSKIACRParser.m      2018-08-22 14:19:07 UTC (rev 22509)
+++ trunk/bibdesk/BDSKIACRParser.m      2018-08-22 21:59:05 UTC (rev 22510)
@@ -81,6 +81,27 @@
     else
         sources = [NSArray arrayWithObjects:rootElement, nil];
        
+    if ([sources count] == 0)
+        return nil;
+    
+    DOMXPathExpression *titleNodePath = nil;
+    DOMXPathExpression *authorNodePath = nil;
+    DOMXPathExpression *pathToSearchNodePath = nil;
+
+    @try {
+        if (isSearch) {
+            titleNodePath = [[self domDocument] 
createExpression:@"following-sibling::dd/b" resolver:nil];
+            authorNodePath = [[self domDocument] 
createExpression:@"following-sibling::dd[position()=2]/em" resolver:nil];
+            pathToSearchNodePath = [[self domDocument] 
createExpression:@".//a/@href" resolver:nil];
+        } else {
+            titleNodePath = [[self domDocument] createExpression:@".//b" 
resolver:nil];
+            authorNodePath = [[self domDocument] createExpression:@".//i" 
resolver:nil];
+        }
+    }
+    @catch (id e) {
+        NSLog(@"Ignoring XPath exception: %@", e);
+    }
+    
     for (DOMNode *node in sources) {
                
                NSMutableDictionary *pubFields = [NSMutableDictionary 
dictionary];
@@ -89,29 +110,20 @@
         NSArray *nodes;
         NSString *string;
         
-        if (isSearch) {
-            // set title
-            nodes = [node nodesForXPath:@"following-sibling::dd/b"];
+        // set title
+        nodes = [node nodesForXPathExpression:titleNodePath];
+        if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+            [pubFields setObject:string forKey:BDSKTitleString];
+        // set authors
+        nodes = [node nodesForXPathExpression:authorNodePath];
+        if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+            [pubFields setObject:string forKey:BDSKAuthorString];
+        // to get year and report number
+        if (pathToSearchNodePath) {
+            nodes = [node nodesForXPathExpression:pathToSearchNodePath];
             if ([nodes count] && (string = [[nodes firstObject] stringValue]))
-                [pubFields setObject:string forKey:BDSKTitleString];
-            nodes = [node 
nodesForXPath:@"following-sibling::dd[position()=2]/em"];
-            // set authors
-            if ([nodes count] && (string = [[nodes firstObject] stringValue]))
-                [pubFields setObject:string forKey:BDSKAuthorString];
-            nodes = [node nodesForXPath:@".//a/@href"];
-            // to get year and report number
-            if ([nodes count] && (string = [[nodes firstObject] stringValue]))
                 pathToSearch = string;
         } else {
-            // set title
-            nodes = [node nodesForXPath:@".//b"];
-            if ([nodes count] && (string = [[nodes firstObject] stringValue]))
-                [pubFields setObject:string forKey:BDSKTitleString];
-            // set authors
-            nodes = [node nodesForXPath:@".//i"];
-            if ([nodes count] && (string = [[nodes firstObject] stringValue]))
-                [pubFields setObject:string forKey:BDSKAuthorString];
-            // to get year and report number
             pathToSearch = [url path];
         }
         

Modified: trunk/bibdesk/DOMNode_BDSKExtensions.h
===================================================================
--- trunk/bibdesk/DOMNode_BDSKExtensions.h      2018-08-22 14:19:07 UTC (rev 
22509)
+++ trunk/bibdesk/DOMNode_BDSKExtensions.h      2018-08-22 21:59:05 UTC (rev 
22510)
@@ -42,6 +42,7 @@
 @interface DOMNode (BDSKExtensions)
 
 - (NSArray *)nodesForXPath:(NSString *)xpath;
+- (NSArray *)nodesForXPathExpression:(DOMXPathExpression *)xpathExpression;
 
 - (NSString *)stringValue;
 - (NSString *)stringValuePreservingBreaks;

Modified: trunk/bibdesk/DOMNode_BDSKExtensions.m
===================================================================
--- trunk/bibdesk/DOMNode_BDSKExtensions.m      2018-08-22 14:19:07 UTC (rev 
22509)
+++ trunk/bibdesk/DOMNode_BDSKExtensions.m      2018-08-22 21:59:05 UTC (rev 
22510)
@@ -42,7 +42,32 @@
 @implementation DOMNode (BDSKExtensions)
 
 - (NSArray *)nodesForXPath:(NSString *)xpath {
-    DOMXPathResult *result = [[self ownerDocument] evaluate:xpath 
contextNode:self resolver:nil type:DOM_ANY_TYPE inResult:nil];
+    DOMXPathResult *result = nil;
+    DOMNode *node = nil;
+    NSMutableArray *nodes = nil;
+    
+    @try {
+        result = [[self ownerDocument] evaluate:xpath contextNode:self 
resolver:nil type:DOM_ORDERED_NODE_ITERATOR_TYPE inResult:nil];
+    }
+    @catch (id e) {
+        NSLog(@"Ignoring XPath exception: %@", e);
+    }
+    
+    if ((node = [result iterateNext])) {
+        nodes = [NSMutableArray array];
+        do {
+            [nodes addObject:node];
+        } while ((node = [result iterateNext]));
+    }
+    
+    return nodes;
+}
+
+- (NSArray *)nodesForXPathExpression:(DOMXPathExpression *)xpathExpression {
+    if (xpathExpression == nil)
+        return nil;
+    
+    DOMXPathResult *result = [xpathExpression evaluate:self 
type:DOM_ORDERED_NODE_ITERATOR_TYPE inResult:nil];
     DOMNode *node = [result iterateNext];
     NSMutableArray *nodes = nil;
     

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit

Reply via email to