Revision: 22521
http://sourceforge.net/p/bibdesk/svn/22521
Author: hofman
Date: 2018-08-24 09:26:45 +0000 (Fri, 24 Aug 2018)
Log Message:
-----------
include search in xpath
Modified Paths:
--------------
trunk/bibdesk/BDSKDOIWebParser.m
Modified: trunk/bibdesk/BDSKDOIWebParser.m
===================================================================
--- trunk/bibdesk/BDSKDOIWebParser.m 2018-08-24 09:08:53 UTC (rev 22520)
+++ trunk/bibdesk/BDSKDOIWebParser.m 2018-08-24 09:26:45 UTC (rev 22521)
@@ -49,23 +49,15 @@
DOMNode *rootElement = [domDocument documentElement];
NSString *doiXPath;
NSArray *nodes;
- DOMNode *node;
- doiXPath =
@"./head/meta[starts-with(translate(@content,'DOI','doi'),'doi:') or
starts-with(@content,'10.')]";
- nodes = [rootElement nodesForXPath:doiXPath];
+ doiXPath = @"./head/meta[contains(' citation_doi doi prism.doi
dc.identifier ',concat(' ',translate(@name,'ACDEFIMNOPRST','acdefimnoprst'),'
')) and (starts-with(translate(@content,'DOI','doi'),'doi:') or
starts-with(@content,'10.'))]";
- if ([nodes count] > 0) {
- NSSet *names = [NSSet setWithObjects:@"citation_doi", @"doi",
@"prism.doi", @"dc.identifier", nil];
- for (node in nodes) {
- if ([names containsObject:[[node nodeName] lowercaseString]])
- return YES;
- }
- }
+ if (nil != [rootElement singleNodeForXPath:doiXPath])
+ return YES;
doiXPath = @"./body//a[starts-with(@href,'https://doi.org/') or
starts-with(@href,'http://dx.doi.org/')]";
- node = [rootElement singleNodeForXPath:doiXPath];
- if (node != nil)
+ if (nil != [rootElement singleNodeForXPath:doiXPath])
return YES;
NSString *text = [[domDocument body] textContent];
@@ -80,27 +72,22 @@
DOMNode *rootElement = [[self domDocument] documentElement];
NSString *doiXPath;
NSArray *nodes = nil;
+ DOMNode *node;
AGRegex *doiRegex = [AGRegex
regexWithPattern:@"^(doi:|https?://(dx\\.)?doi\\.org/)?(10\\.[0-9]{4,}(\\.[0-9]+)*/\\S+)$"
options:AGRegexCaseInsensitive];
AGRegexMatch *match;
NSString *doi;
- doiXPath =
@"./head/meta[starts-with(translate(@content,'DOI','doi'),'doi:') or
starts-with(@content,'10.')]";
- nodes = [rootElement nodesForXPath:doiXPath];
+ doiXPath = @"./head/meta[contains(' citation_doi doi prism.doi
dc.identifier ',concat(' ',translate(@name,'ACDEFIMNOPRST','acdefimnoprst'),'
')) and (starts-with(translate(@content,'DOI','doi'),'doi:') or
starts-with(@content,'10.'))]";
+ node = [rootElement singleNodeForXPath:doiXPath];
- if ([nodes count] > 0) {
- NSSet *names = [NSSet setWithObjects:@"citation_doi", @"doi",
@"prism.doi", @"dc.identifier", nil];
- for (DOMElement *node in nodes) {
- if ([names containsObject:[[node nodeName] lowercaseString]]) {
- doi = [node getAttribute:@"content"];
- if (doi && (match = [doiRegex findInString:doi])) {
- doi = [match groupAtIndex:3];
- if ([[match groupAtIndex:1] hasPrefix:@"http"] == NO)
- doi = [doi stringByAddingPercentEscapes];
- [dois addObject:doi];
- }
- break;
- }
+ if (node) {
+ doi = [(DOMElement *)node getAttribute:@"content"];
+ if (doi && (match = [doiRegex findInString:doi])) {
+ doi = [match groupAtIndex:3];
+ if ([[match groupAtIndex:1] hasPrefix:@"http"] == NO)
+ doi = [doi stringByAddingPercentEscapes];
+ [dois addObject:doi];
}
}
@@ -108,8 +95,8 @@
doiXPath = @"./body//a[starts-with(@href,'https://doi.org/') or
starts-with(@href,'http://dx.doi.org/')]";
nodes = [rootElement nodesForXPath:doiXPath];
- for (DOMElement *node in nodes) {
- doi = [node getAttribute:@"href"];
+ for (node in nodes) {
+ doi = [(DOMElement *)node getAttribute:@"href"];
if (doi && (match = [doiRegex findInString:doi])) {
doi = [[match groupAtIndex:3]
stringByTrimmingCharactersInSet:[NSCharacterSet punctuationCharacterSet]];
if ([dois containsObject:doi] == NO)
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit