Revision: 22510
http://sourceforge.net/p/bibdesk/svn/22510
Author: hofman
Date: 2018-08-22 21:59:05 +0000 (Wed, 22 Aug 2018)
Log Message:
-----------
Catch exceptions from XPath parsing. Method to get node from evaluated XPath
expression that can be reused
Modified Paths:
--------------
trunk/bibdesk/BDSKArxivParser.m
trunk/bibdesk/BDSKIACRParser.m
trunk/bibdesk/DOMNode_BDSKExtensions.h
trunk/bibdesk/DOMNode_BDSKExtensions.m
Modified: trunk/bibdesk/BDSKArxivParser.m
===================================================================
--- trunk/bibdesk/BDSKArxivParser.m 2018-08-22 14:19:07 UTC (rev 22509)
+++ trunk/bibdesk/BDSKArxivParser.m 2018-08-22 21:59:05 UTC (rev 22510)
@@ -75,59 +75,77 @@
BOOL isAbstract = [url hasFirstPathComponent:@"abs"];
BOOL isSearch = [url hasFirstPathComponent:@"search"];
BOOL isList = [url hasFirstPathComponent:@"list"];
+
+ NSString *arxivSearchResultNodePath = nil;
- NSString *arxivSearchResultNodePath = nil;
- NSString *arxivLinkNodePath;
- NSString *arxivIDNodePath;
- NSString *pdfURLNodePath;
- NSString *doiNodePath;
- NSString *titleNodePath;
- NSString *authorsNodePath;
- NSString *journalNodePath;
- NSString *abstractNodePath;
-
- if (isAbstract) {
+ if (isAbstract)
arxivSearchResultNodePath =
@"./body/div[@id='content']/div[@id='abs']/div[@class='leftcolumn']";
-
- arxivLinkNodePath =
@"./div[@class='metatable']/table//td[@class='tablecell arxivid']";
- arxivIDNodePath = @"./a[contains(text(),'arXiv:')]";
-
- pdfURLNodePath =
@"../div[@class='extra-services']/div[@class='full-text']/ul/li/a[contains(text(),'PDF')]";
- doiNodePath = @"./div[@class='metatable']/table//td[@class='tablecell
doi']/a";
-
- titleNodePath = @"./h1[contains(concat(' ',normalize-space(@class),'
'),' title ')]/span/following-sibling::text()";
- authorsNodePath = @"./div[@class='authors']/a";
- journalNodePath =
@"./div[@class='metatable']/table//td[@class='tablecell jref']";
- abstractNodePath = @"./blockquote[contains(concat('
',normalize-space(@class),' '),' abstract ')]";
- } else if (isSearch) {
+ else if (isSearch)
arxivSearchResultNodePath =
@"./body//ol[@class='breathe-horizontal']/li[@class='arxiv-result']";
-
- arxivLinkNodePath = @"./div[contains(concat('
',normalize-space(@class),' '),' level ')]/p[contains(concat('
',normalize-space(@class),' '),' list-title ')]";
- arxivIDNodePath = @"./a[contains(text(),'arXiv:')]";
-
- pdfURLNodePath = @"./span/a[contains(text(),'pdf')]";
- doiNodePath =
@"../div/div/div/span[contains(text(),'doi')]/following-sibling::span/a";
-
- titleNodePath = @"./p[contains(concat(' ',normalize-space(@class),'
'),' title ')]";
- authorsNodePath = @"./p[@class='authors']/a";
- journalNodePath = @"./p[contains(concat(' ',normalize-space(@class),'
'),' comments ')]/span[text()='Journal ref:']/following-sibling::text()";
- abstractNodePath = @"./p[contains(concat(' ',normalize-space(@class),'
'),' abstract ')]/span[contains(concat(' ',normalize-space(@class),' '),'
abstract-full ')]";
- } else if (isList) {
+ else if (isList)
arxivSearchResultNodePath = @"./body/div[@id='content']/div/dl/dt";
+ else
+ return nil;
- arxivLinkNodePath = @"./span[@class='list-identifier']";
- arxivIDNodePath = @"./a[contains(text(),'arXiv:')]";
-
- pdfURLNodePath = @"./a[contains(text(),'pdf')]";
- doiNodePath =
@"./div[@class='list-doi']/span/following-sibling::text()";
-
- titleNodePath = @"./div[contains(concat(' ',normalize-space(@class),'
'),' list-title ')]/span/following-sibling::text()";
- authorsNodePath = @"./div[@class='list-authors']/a";
- journalNodePath =
@"./div[@class='list-journal-ref']/span/following-sibling::text()";
- abstractNodePath = @"./p";
- } else {
- return nil;
+ // fetch the arxiv search results
+ NSArray *arxivSearchResults = [[[self domDocument] documentElement]
nodesForXPath:arxivSearchResultNodePath];
+
+ if ([arxivSearchResults count] == 0)
+ return nil;
+
+ DOMXPathExpression *arxivLinkNodePath = nil;
+ DOMXPathExpression *arxivIDNodePath = nil;
+ DOMXPathExpression *pdfURLNodePath = nil;
+ DOMXPathExpression *doiNodePath = nil;
+ DOMXPathExpression *arxivNodePath = nil;
+ DOMXPathExpression *titleNodePath = nil;
+ DOMXPathExpression *authorsNodePath = nil;
+ DOMXPathExpression *journalNodePath = nil;
+ DOMXPathExpression *abstractNodePath = nil;
+
+ @try {
+ if (isAbstract) {
+ arxivLinkNodePath = [[self domDocument]
createExpression:@"./div[@class='metatable']/table//td[@class='tablecell
arxivid']" resolver:nil];
+ arxivIDNodePath = [[self domDocument]
createExpression:@"./a[contains(text(),'arXiv:')]" resolver:nil];
+
+ pdfURLNodePath = [[self domDocument]
createExpression:@"../div[@class='extra-services']/div[@class='full-text']/ul/li/a[contains(text(),'PDF')]"
resolver:nil];
+ doiNodePath = [[self domDocument]
createExpression:@"./div[@class='metatable']/table//td[@class='tablecell
doi']/a" resolver:nil];
+
+ titleNodePath = [[self domDocument]
createExpression:@"./h1[contains(concat(' ',normalize-space(@class),' '),'
title ')]/span/following-sibling::text()" resolver:nil];
+ authorsNodePath = [[self domDocument]
createExpression:@"./div[@class='authors']/a" resolver:nil];
+ journalNodePath = [[self domDocument]
createExpression:@"./div[@class='metatable']/table//td[@class='tablecell
jref']" resolver:nil];
+ abstractNodePath = [[self domDocument]
createExpression:@"./blockquote[contains(concat(' ',normalize-space(@class),'
'),' abstract ')]" resolver:nil];
+ } else if (isSearch) {
+ arxivLinkNodePath = [[self domDocument]
createExpression:@"./div[contains(concat(' ',normalize-space(@class),' '),'
level ')]/p[contains(concat(' ',normalize-space(@class),' '),' list-title ')]"
resolver:nil];
+ arxivIDNodePath = [[self domDocument]
createExpression:@"./a[contains(text(),'arXiv:')]" resolver:nil];
+
+ pdfURLNodePath = [[self domDocument]
createExpression:@"./span/a[contains(text(),'pdf')]" resolver:nil];
+ doiNodePath = [[self domDocument]
createExpression:@"../div/div/div/span[contains(text(),'doi')]/following-sibling::span/a"
resolver:nil];
+
+ titleNodePath = [[self domDocument]
createExpression:@"./p[contains(concat(' ',normalize-space(@class),' '),' title
')]" resolver:nil];
+ authorsNodePath = [[self domDocument]
createExpression:@"./p[@class='authors']/a" resolver:nil];
+ journalNodePath = [[self domDocument]
createExpression:@"./p[contains(concat(' ',normalize-space(@class),' '),'
comments ')]/span[text()='Journal ref:']/following-sibling::text()"
resolver:nil];
+ abstractNodePath = [[self domDocument]
createExpression:@"./p[contains(concat(' ',normalize-space(@class),' '),'
abstract ')]/span[contains(concat(' ',normalize-space(@class),' '),'
abstract-full ')]" resolver:nil];
+ } else if (isList) {
+ arxivLinkNodePath = [[self domDocument]
createExpression:@"./span[@class='list-identifier']" resolver:nil];
+ arxivIDNodePath = [[self domDocument]
createExpression:@"./a[contains(text(),'arXiv:')]" resolver:nil];
+
+ pdfURLNodePath = [[self domDocument]
createExpression:@"./a[contains(text(),'pdf')]" resolver:nil];
+ doiNodePath = [[self domDocument]
createExpression:@"./div[@class='list-doi']/span/following-sibling::text()"
resolver:nil];
+
+ arxivNodePath = [[self domDocument]
createExpression:@"./following-sibling::dd/div[@class='meta']" resolver:nil];
+
+ titleNodePath = [[self domDocument]
createExpression:@"./div[contains(concat(' ',normalize-space(@class),' '),'
list-title ')]/span/following-sibling::text()" resolver:nil];
+ authorsNodePath = [[self domDocument]
createExpression:@"./div[@class='list-authors']/a" resolver:nil];
+ journalNodePath = [[self domDocument]
createExpression:@"./div[@class='list-journal-ref']/span/following-sibling::text()"
resolver:nil];
+ abstractNodePath = [[self domDocument] createExpression:@"./p"
resolver:nil];
+ } else {
+ return nil;
+ }
}
+ @catch (id e) {
+ NSLog(@"Ignoring XPath exception: %@", e);
+ }
AGRegex *eprintRegex1 = [AGRegex
regexWithPattern:@"([0-9]{2})([0-9]{2})\\.([0-9]{4})"
options:AGRegexMultiline];
@@ -141,12 +159,6 @@
AGRegex *journalRegex3 = [AGRegex
regexWithPattern:@"(.+[^0-9])([0-9]+):(.*),([0-9]{4})"
options:AGRegexMultiline];
- // fetch the arxiv search results
- NSArray *arxivSearchResults = nil;
- DOMElement *rootElement = [[self domDocument] documentElement];
-
- arxivSearchResults = [rootElement nodesForXPath:arxivSearchResultNodePath];
-
BOOL addLinkedFiles = NO == [[NSUserDefaults standardUserDefaults]
boolForKey:BDSKUseLocalUrlAndUrlKey];
NSMutableArray *items = [NSMutableArray arrayWithCapacity:0];
@@ -154,7 +166,7 @@
for (DOMNode *arxivSearchResult in arxivSearchResults) {
// fetch the arxiv links
- NSArray *nodes = [arxivSearchResult nodesForXPath:arxivLinkNodePath];
+ NSArray *nodes = [arxivSearchResult
nodesForXPathExpression:arxivLinkNodePath];
if (1 != [nodes count]) {
// If arXiv ever start providing multiple alternative bibtex links
for a
@@ -171,7 +183,7 @@
NSString *string = nil;
// search for arXiv ID
- nodes = [arxivNode nodesForXPath:arxivIDNodePath];
+ nodes = [arxivNode nodesForXPathExpression:arxivIDNodePath];
if (1 == [nodes count]) {
node = [nodes firstObject];
if ((string = [node stringValue])) {
@@ -193,7 +205,7 @@
arxivNode = arxivSearchResult;
// search for PDF
- nodes = [arxivNode nodesForXPath:pdfURLNodePath];
+ nodes = [arxivNode nodesForXPathExpression:pdfURLNodePath];
if (1 == [nodes count]) {
// successfully found the result PDF url
if ((string = [[nodes firstObject] getAttribute:@"href"])) {
@@ -207,7 +219,7 @@
}
// search for DOI
- nodes = [arxivNode nodesForXPath:doiNodePath];
+ nodes = [arxivNode nodesForXPathExpression:doiNodePath];
if (1 == [nodes count]) {
// successfully found the result PDF url
if ((string = [[nodes firstObject] stringValue])) {
@@ -215,14 +227,14 @@
}
}
- if (isList) {
- arxivNode = [[arxivSearchResult
nodesForXPath:@"./following-sibling::dd/div[@class='meta']"] firstObject];
+ if (arxivNodePath) {
+ arxivNode = [[arxivSearchResult
nodesForXPathExpression:arxivNodePath] firstObject];
} else {
arxivNode = arxivSearchResult;
}
// search for title
- nodes = [arxivNode nodesForXPath:titleNodePath];
+ nodes = [arxivNode nodesForXPathExpression:titleNodePath];
if (1 == [nodes count]) {
if ((string = [[nodes firstObject] stringValue])) {
[pubFields setValue:string forKey:BDSKTitleString];
@@ -230,7 +242,7 @@
}
// search for authors
- nodes = [arxivNode nodesForXPath:authorsNodePath];
+ nodes = [arxivNode nodesForXPathExpression:authorsNodePath];
if (0 < [nodes count]) {
if ((string = [[nodes valueForKeyPath:@"stringValue"]
componentsJoinedByString:@" and "])) {
[pubFields setValue:string forKey:BDSKAuthorString];
@@ -238,7 +250,7 @@
}
// search for journal ref
- nodes = [arxivNode nodesForXPath:journalNodePath];
+ nodes = [arxivNode nodesForXPathExpression:journalNodePath];
if (1 == [nodes count]) {
if ((string = [[nodes firstObject] stringValue])) {
// try to get full journal ref components, as "Journal Volume
(Year) Pages"
@@ -268,7 +280,7 @@
}
// search for abstract
- nodes = [arxivNode nodesForXPath:abstractNodePath];
+ nodes = [arxivNode nodesForXPathExpression:abstractNodePath];
if (1 == [nodes count]) {
if ((string = [[nodes firstObject] stringValuePreservingBreaks])) {
if (isAbstract && [string hasPrefix:@"Abstract: "])
Modified: trunk/bibdesk/BDSKIACRParser.m
===================================================================
--- trunk/bibdesk/BDSKIACRParser.m 2018-08-22 14:19:07 UTC (rev 22509)
+++ trunk/bibdesk/BDSKIACRParser.m 2018-08-22 21:59:05 UTC (rev 22510)
@@ -81,6 +81,27 @@
else
sources = [NSArray arrayWithObjects:rootElement, nil];
+ if ([sources count] == 0)
+ return nil;
+
+ DOMXPathExpression *titleNodePath = nil;
+ DOMXPathExpression *authorNodePath = nil;
+ DOMXPathExpression *pathToSearchNodePath = nil;
+
+ @try {
+ if (isSearch) {
+ titleNodePath = [[self domDocument]
createExpression:@"following-sibling::dd/b" resolver:nil];
+ authorNodePath = [[self domDocument]
createExpression:@"following-sibling::dd[position()=2]/em" resolver:nil];
+ pathToSearchNodePath = [[self domDocument]
createExpression:@".//a/@href" resolver:nil];
+ } else {
+ titleNodePath = [[self domDocument] createExpression:@".//b"
resolver:nil];
+ authorNodePath = [[self domDocument] createExpression:@".//i"
resolver:nil];
+ }
+ }
+ @catch (id e) {
+ NSLog(@"Ignoring XPath exception: %@", e);
+ }
+
for (DOMNode *node in sources) {
NSMutableDictionary *pubFields = [NSMutableDictionary
dictionary];
@@ -89,29 +110,20 @@
NSArray *nodes;
NSString *string;
- if (isSearch) {
- // set title
- nodes = [node nodesForXPath:@"following-sibling::dd/b"];
+ // set title
+ nodes = [node nodesForXPathExpression:titleNodePath];
+ if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+ [pubFields setObject:string forKey:BDSKTitleString];
+ // set authors
+ nodes = [node nodesForXPathExpression:authorNodePath];
+ if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+ [pubFields setObject:string forKey:BDSKAuthorString];
+ // to get year and report number
+ if (pathToSearchNodePath) {
+ nodes = [node nodesForXPathExpression:pathToSearchNodePath];
if ([nodes count] && (string = [[nodes firstObject] stringValue]))
- [pubFields setObject:string forKey:BDSKTitleString];
- nodes = [node
nodesForXPath:@"following-sibling::dd[position()=2]/em"];
- // set authors
- if ([nodes count] && (string = [[nodes firstObject] stringValue]))
- [pubFields setObject:string forKey:BDSKAuthorString];
- nodes = [node nodesForXPath:@".//a/@href"];
- // to get year and report number
- if ([nodes count] && (string = [[nodes firstObject] stringValue]))
pathToSearch = string;
} else {
- // set title
- nodes = [node nodesForXPath:@".//b"];
- if ([nodes count] && (string = [[nodes firstObject] stringValue]))
- [pubFields setObject:string forKey:BDSKTitleString];
- // set authors
- nodes = [node nodesForXPath:@".//i"];
- if ([nodes count] && (string = [[nodes firstObject] stringValue]))
- [pubFields setObject:string forKey:BDSKAuthorString];
- // to get year and report number
pathToSearch = [url path];
}
Modified: trunk/bibdesk/DOMNode_BDSKExtensions.h
===================================================================
--- trunk/bibdesk/DOMNode_BDSKExtensions.h 2018-08-22 14:19:07 UTC (rev
22509)
+++ trunk/bibdesk/DOMNode_BDSKExtensions.h 2018-08-22 21:59:05 UTC (rev
22510)
@@ -42,6 +42,7 @@
@interface DOMNode (BDSKExtensions)
- (NSArray *)nodesForXPath:(NSString *)xpath;
+- (NSArray *)nodesForXPathExpression:(DOMXPathExpression *)xpathExpression;
- (NSString *)stringValue;
- (NSString *)stringValuePreservingBreaks;
Modified: trunk/bibdesk/DOMNode_BDSKExtensions.m
===================================================================
--- trunk/bibdesk/DOMNode_BDSKExtensions.m 2018-08-22 14:19:07 UTC (rev
22509)
+++ trunk/bibdesk/DOMNode_BDSKExtensions.m 2018-08-22 21:59:05 UTC (rev
22510)
@@ -42,7 +42,32 @@
@implementation DOMNode (BDSKExtensions)
- (NSArray *)nodesForXPath:(NSString *)xpath {
- DOMXPathResult *result = [[self ownerDocument] evaluate:xpath
contextNode:self resolver:nil type:DOM_ANY_TYPE inResult:nil];
+ DOMXPathResult *result = nil;
+ DOMNode *node = nil;
+ NSMutableArray *nodes = nil;
+
+ @try {
+ result = [[self ownerDocument] evaluate:xpath contextNode:self
resolver:nil type:DOM_ORDERED_NODE_ITERATOR_TYPE inResult:nil];
+ }
+ @catch (id e) {
+ NSLog(@"Ignoring XPath exception: %@", e);
+ }
+
+ if ((node = [result iterateNext])) {
+ nodes = [NSMutableArray array];
+ do {
+ [nodes addObject:node];
+ } while ((node = [result iterateNext]));
+ }
+
+ return nodes;
+}
+
+- (NSArray *)nodesForXPathExpression:(DOMXPathExpression *)xpathExpression {
+ if (xpathExpression == nil)
+ return nil;
+
+ DOMXPathResult *result = [xpathExpression evaluate:self
type:DOM_ORDERED_NODE_ITERATOR_TYPE inResult:nil];
DOMNode *node = [result iterateNext];
NSMutableArray *nodes = nil;
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit