Revision: 22509
http://sourceforge.net/p/bibdesk/svn/22509
Author: hofman
Date: 2018-08-22 14:19:07 +0000 (Wed, 22 Aug 2018)
Log Message:
-----------
Replace use of NSXML library with WebKit DOM library in web parsers. NSXML is
buggy, as it seems to choke on certain stuff (comments?) and drops empty
elements. Also we don't need to create a separate XML document. Need to change
some XPath logic because DOM only supports XPath version 1, rather than 2.
Modified Paths:
--------------
trunk/bibdesk/BDSKACMDLParser.m
trunk/bibdesk/BDSKArxivParser.m
trunk/bibdesk/BDSKAsynchronousWebParser.h
trunk/bibdesk/BDSKAsynchronousWebParser.m
trunk/bibdesk/BDSKBibTeXWebParser.m
trunk/bibdesk/BDSKCOinSParser.m
trunk/bibdesk/BDSKCiteULikeParser.m
trunk/bibdesk/BDSKDOIWebParser.m
trunk/bibdesk/BDSKGoogleScholarParser.m
trunk/bibdesk/BDSKHCiteParser.m
trunk/bibdesk/BDSKHubmedParser.m
trunk/bibdesk/BDSKIACRParser.m
trunk/bibdesk/BDSKIEEEXploreParser.m
trunk/bibdesk/BDSKIUCrParser.m
trunk/bibdesk/BDSKInspireParser.m
trunk/bibdesk/BDSKJSTORWebParser.m
trunk/bibdesk/BDSKMASParser.m
trunk/bibdesk/BDSKMathSciNetParser.m
trunk/bibdesk/BDSKNumdamParser.m
trunk/bibdesk/BDSKProjectEuclidParser.m
trunk/bibdesk/BDSKSIAMParser.m
trunk/bibdesk/BDSKScienceDirectParser.m
trunk/bibdesk/BDSKSpringerParser.m
trunk/bibdesk/BDSKWebParser.h
trunk/bibdesk/BDSKWebParser.m
trunk/bibdesk/BDSKZentralblattParser.m
trunk/bibdesk/DOMNode_BDSKExtensions.h
trunk/bibdesk/DOMNode_BDSKExtensions.m
trunk/bibdesk/NSString_BDSKExtensions.h
trunk/bibdesk/NSString_BDSKExtensions.m
Modified: trunk/bibdesk/BDSKACMDLParser.m
===================================================================
--- trunk/bibdesk/BDSKACMDLParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKACMDLParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -37,7 +37,6 @@
*/
#import "BDSKACMDLParser.h"
-#import "NSXMLNode_BDSKExtensions.h"
#import <AGRegex/AGRegex.h>
@@ -45,8 +44,8 @@
+ (NSString *)citationNodeXPath { return
@"./head/meta[@name='citation_abstract_html_url']/@content"; }
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
- NSString *nodeString = [[NSURL URLWithString:[node
stringValueOfAttribute:@"content"]] query];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+ NSString *nodeString = [[NSURL URLWithString:[(DOMElement *)node
getAttribute:@"content"]] query];
AGRegex *doiRegex = [AGRegex regexWithPattern:@"^id=([0-9]+)\\.([0-9]+)$"];
AGRegexMatch *match = [doiRegex findInString:nodeString];
Modified: trunk/bibdesk/BDSKArxivParser.m
===================================================================
--- trunk/bibdesk/BDSKArxivParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKArxivParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,7 +39,7 @@
#import "BDSKArxivParser.h"
#import "BibItem.h"
#import "BDSKLinkedFile.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
#import "NSURL_BDSKExtensions.h"
#import <AGRegex/AGRegex.h>
@@ -46,7 +46,7 @@
@implementation BDSKArxivParser
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
// !!! other countries end up with e.g. fr.arxiv.org; checking for
scholar.arxiv.com may fail in those cases
if ([url hasDomain:@"arxiv.org"] == NO)
@@ -63,8 +63,7 @@
else
return NO;
- NSError *error = nil;
- NSInteger nodecount = [[[xmlDocument rootElement]
nodesForXPath:containsArxivLinkNode error:&error] count];
+ NSInteger nodecount = [[[domDocument documentElement]
nodesForXPath:containsArxivLinkNode] count];
return nodecount > 0;
}
@@ -96,10 +95,10 @@
pdfURLNodePath =
@"../div[@class='extra-services']/div[@class='full-text']/ul/li/a[contains(text(),'PDF')]";
doiNodePath = @"./div[@class='metatable']/table//td[@class='tablecell
doi']/a";
- titleNodePath = @"./h1[contains(concat(' ',normalize-space(@class),'
'),' title ')]/text()";
+ titleNodePath = @"./h1[contains(concat(' ',normalize-space(@class),'
'),' title ')]/span/following-sibling::text()";
authorsNodePath = @"./div[@class='authors']/a";
journalNodePath =
@"./div[@class='metatable']/table//td[@class='tablecell jref']";
- abstractNodePath = @"./blockquote[contains(concat('
',normalize-space(@class),' '),' abstract ')]/text()";
+ abstractNodePath = @"./blockquote[contains(concat('
',normalize-space(@class),' '),' abstract ')]";
} else if (isSearch) {
arxivSearchResultNodePath =
@"./body//ol[@class='breathe-horizontal']/li[@class='arxiv-result']";
@@ -112,7 +111,7 @@
titleNodePath = @"./p[contains(concat(' ',normalize-space(@class),'
'),' title ')]";
authorsNodePath = @"./p[@class='authors']/a";
journalNodePath = @"./p[contains(concat(' ',normalize-space(@class),'
'),' comments ')]/span[text()='Journal ref:']/following-sibling::text()";
- abstractNodePath = @"./p[contains(concat(' ',normalize-space(@class),'
'),' abstract ')]/span[contains(concat(' ',normalize-space(@class),' '),'
abstract-full ')]/text()";
+ abstractNodePath = @"./p[contains(concat(' ',normalize-space(@class),'
'),' abstract ')]/span[contains(concat(' ',normalize-space(@class),' '),'
abstract-full ')]";
} else if (isList) {
arxivSearchResultNodePath = @"./body/div[@id='content']/div/dl/dt";
@@ -120,11 +119,11 @@
arxivIDNodePath = @"./a[contains(text(),'arXiv:')]";
pdfURLNodePath = @"./a[contains(text(),'pdf')]";
- doiNodePath = @"./div[@class='list-doi']/text()";
+ doiNodePath =
@"./div[@class='list-doi']/span/following-sibling::text()";
- titleNodePath = @"./div[contains(concat(' ',normalize-space(@class),'
'),' list-title ')]/text()";
+ titleNodePath = @"./div[contains(concat(' ',normalize-space(@class),'
'),' list-title ')]/span/following-sibling::text()";
authorsNodePath = @"./div[@class='list-authors']/a";
- journalNodePath = @"./div[@class='list-journal-ref']/text()";
+ journalNodePath =
@"./div[@class='list-journal-ref']/span/following-sibling::text()";
abstractNodePath = @"./p";
} else {
return nil;
@@ -144,20 +143,18 @@
// fetch the arxiv search results
NSArray *arxivSearchResults = nil;
- NSXMLElement *rootElement = [[self xmlDocument] rootElement];
+ DOMElement *rootElement = [[self domDocument] documentElement];
- arxivSearchResults = [rootElement nodesForXPath:arxivSearchResultNodePath
error:outError];
+ arxivSearchResults = [rootElement nodesForXPath:arxivSearchResultNodePath];
BOOL addLinkedFiles = NO == [[NSUserDefaults standardUserDefaults]
boolForKey:BDSKUseLocalUrlAndUrlKey];
NSMutableArray *items = [NSMutableArray arrayWithCapacity:0];
- for (NSXMLNode *arxivSearchResult in arxivSearchResults) {
+ for (DOMNode *arxivSearchResult in arxivSearchResults) {
// fetch the arxiv links
- NSError *error = nil;
- NSArray *nodes = [arxivSearchResult nodesForXPath:arxivLinkNodePath
- error:&error];
+ NSArray *nodes = [arxivSearchResult nodesForXPath:arxivLinkNodePath];
if (1 != [nodes count]) {
// If arXiv ever start providing multiple alternative bibtex links
for a
@@ -166,8 +163,8 @@
continue;
}
- NSXMLNode *arxivNode = [nodes objectAtIndex:0];
- NSXMLNode *node;
+ DOMNode *arxivNode = [nodes objectAtIndex:0];
+ DOMNode *node;
NSMutableDictionary *pubFields = [NSMutableDictionary dictionary];
NSMutableArray *pubFiles = [NSMutableArray array];
@@ -174,16 +171,15 @@
NSString *string = nil;
// search for arXiv ID
- nodes = [arxivNode nodesForXPath:arxivIDNodePath error:&error];
+ nodes = [arxivNode nodesForXPath:arxivIDNodePath];
if (1 == [nodes count]) {
node = [nodes firstObject];
if ((string = [node stringValue])) {
- string = [string
stringByRemovingSurroundingWhitespaceAndNewlines];
if ([string hasCaseInsensitivePrefix:@"arXiv:"])
string = [string substringFromIndex:6];
[pubFields setValue:string forKey:@"Eprint"];
}
- if ((string = [node stringValueOfAttribute:@"href"])) {
+ if ((string = [(DOMElement *)node getAttribute:@"href"])) {
// fix relative urls
if (NO == [string containsString:@"://"])
string = [[NSURL URLWithString:string relativeToURL:url]
absoluteString];
@@ -197,10 +193,10 @@
arxivNode = arxivSearchResult;
// search for PDF
- nodes = [arxivNode nodesForXPath:pdfURLNodePath error:&error];
+ nodes = [arxivNode nodesForXPath:pdfURLNodePath];
if (1 == [nodes count]) {
// successfully found the result PDF url
- if ((string = [[nodes firstObject]
stringValueOfAttribute:@"href"])) {
+ if ((string = [[nodes firstObject] getAttribute:@"href"])) {
// fix relative urls
if (NO == [string containsString:@"://"])
string = [[NSURL URLWithString:string relativeToURL:url]
absoluteString];
@@ -211,7 +207,7 @@
}
// search for DOI
- nodes = [arxivNode nodesForXPath:doiNodePath error:&error];
+ nodes = [arxivNode nodesForXPath:doiNodePath];
if (1 == [nodes count]) {
// successfully found the result PDF url
if ((string = [[nodes firstObject] stringValue])) {
@@ -219,33 +215,32 @@
}
}
- if (isList)
- arxivNode = [[[arxivSearchResult nextSibling] children]
firstObject];
- else
+ if (isList) {
+ arxivNode = [[arxivSearchResult
nodesForXPath:@"./following-sibling::dd/div[@class='meta']"] firstObject];
+ } else {
arxivNode = arxivSearchResult;
+ }
// search for title
- nodes = [arxivNode nodesForXPath:titleNodePath error:&error];
+ nodes = [arxivNode nodesForXPath:titleNodePath];
if (1 == [nodes count]) {
if ((string = [[nodes firstObject] stringValue])) {
- string = [string
stringByRemovingSurroundingWhitespaceAndNewlines];
[pubFields setValue:string forKey:BDSKTitleString];
}
}
// search for authors
- nodes = [arxivNode nodesForXPath:authorsNodePath error:&error];
+ nodes = [arxivNode nodesForXPath:authorsNodePath];
if (0 < [nodes count]) {
- if ((string = [[nodes
valueForKeyPath:@"stringValue.stringByRemovingSurroundingWhitespaceAndNewlines"]
componentsJoinedByString:@" and "])) {
+ if ((string = [[nodes valueForKeyPath:@"stringValue"]
componentsJoinedByString:@" and "])) {
[pubFields setValue:string forKey:BDSKAuthorString];
}
}
// search for journal ref
- nodes = [arxivNode nodesForXPath:journalNodePath error:&error];
+ nodes = [arxivNode nodesForXPath:journalNodePath];
if (1 == [nodes count]) {
if ((string = [[nodes firstObject] stringValue])) {
- string = [string
stringByRemovingSurroundingWhitespaceAndNewlines];
// try to get full journal ref components, as "Journal Volume
(Year) Pages"
AGRegexMatch *match = [journalRegex1 findInString:string];
if ([match groupAtIndex:0]) {
@@ -273,10 +268,13 @@
}
// search for abstract
- nodes = [arxivNode nodesForXPath:abstractNodePath error:&error];
+ nodes = [arxivNode nodesForXPath:abstractNodePath];
if (1 == [nodes count]) {
- if ((string = [[nodes firstObject] stringValue])) {
- string = [string
stringByRemovingSurroundingWhitespaceAndNewlines];
+ if ((string = [[nodes firstObject] stringValuePreservingBreaks])) {
+ if (isAbstract && [string hasPrefix:@"Abstract: "])
+ string = [string substringFromIndex:10];
+ if (isSearch && [string hasSuffix:@"\u25B3 Less"])
+ string = [[string substringToIndex:[string length] - 6]
stringByRemovingSurroundingWhitespaceAndNewlines];
[pubFields setValue:string forKey:BDSKAbstractString];
}
}
Modified: trunk/bibdesk/BDSKAsynchronousWebParser.h
===================================================================
--- trunk/bibdesk/BDSKAsynchronousWebParser.h 2018-08-22 06:30:25 UTC (rev
22508)
+++ trunk/bibdesk/BDSKAsynchronousWebParser.h 2018-08-22 14:19:07 UTC (rev
22509)
@@ -59,7 +59,7 @@
// Cet the URL string pointing to the bibTeX data from the node found by
citationNodeXPath
// Can be a relative URL, which is completed using the URL of the web page
// By default returns the href attribute of the node
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node;
++ (NSString *)citationURLStringFromNode:(DOMNode *)node;
// Subclasses can also override -itemsReturningError: adding downloads
themselves using the following
- (void)addDownloadWithRequest:(NSURLRequest *)request
contextInfo:(id)contextInfo;
Modified: trunk/bibdesk/BDSKAsynchronousWebParser.m
===================================================================
--- trunk/bibdesk/BDSKAsynchronousWebParser.m 2018-08-22 06:30:25 UTC (rev
22508)
+++ trunk/bibdesk/BDSKAsynchronousWebParser.m 2018-08-22 14:19:07 UTC (rev
22509)
@@ -40,7 +40,7 @@
#import "BibItem.h"
#import "BDSKBibTeXParser.h"
#import "NSError_BDSKExtensions.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
#import "NSURL_BDSKExtensions.h"
@@ -62,14 +62,13 @@
return [self finishedStarting] && [downloads count] == 0;
}
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url {
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url {
NSString *host = [[[NSURL URLWithString:[self address]] host]
lowercaseString];
if ([url hasDomain:host] == NO)
return NO;
- NSError *error = nil;
- NSArray *nodes = [[xmlDocument rootElement] nodesForXPath:[self
citationNodeXPath] error:&error];
+ NSArray *nodes = [[domDocument documentElement] nodesForXPath:[self
citationNodeXPath]];
if ([nodes count] == 0)
return NO;
@@ -80,10 +79,10 @@
- (NSArray *)itemsReturningError:(NSError **)outError {
NSString *bibtexNodePath = [[self class] citationNodeXPath];
- NSArray *bibtexNodes = [[[self xmlDocument] rootElement]
nodesForXPath:bibtexNodePath error:outError];
+ NSArray *bibtexNodes = [[[self domDocument] documentElement]
nodesForXPath:bibtexNodePath];
NSMutableArray *bibtexURLStrings = [NSMutableArray array];
- for (NSXMLNode *bibtexNode in bibtexNodes) {
+ for (DOMNode *bibtexNode in bibtexNodes) {
NSString *bibtexURLString = [[self class]
citationURLStringFromNode:bibtexNode];
if (bibtexURLString != nil && [bibtexURLStrings
containsObject:bibtexURLString] == NO)
[bibtexURLStrings addObject:bibtexURLString];
@@ -106,8 +105,8 @@
+ (NSString *)citationNodeXPath { return @""; }
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
- return [node stringValueOfAttribute:@"href"];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+ return [(DOMElement *)node getAttribute:@"href"];
}
- (void)downloadDidFinish:(BDSKCitationDownload *)download {
Modified: trunk/bibdesk/BDSKBibTeXWebParser.m
===================================================================
--- trunk/bibdesk/BDSKBibTeXWebParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKBibTeXWebParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,7 +39,7 @@
#import "BDSKBibTeXWebParser.h"
#import "BDSKBibTeXParser.h"
#import "BibItem.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
#import "NSError_BDSKExtensions.h"
#import <AGRegex/AGRegex.h>
@@ -46,9 +46,9 @@
@implementation BDSKBibTeXWebParser
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
- NSString *text = [xmlDocument textStringValue];
+ NSString *text = [[domDocument body] textContent];
AGRegex *bibtexRegex = [AGRegex regexWithPattern:@"@[[:alpha:]]+[
\\t]*[{(]"];
return nil != [bibtexRegex findInString:text];
@@ -59,7 +59,7 @@
NSMutableArray *items = [NSMutableArray array];
- NSString *text = [[self xmlDocument] textStringValue];
+ NSString *text = [[[self domDocument] body] textContent];
AGRegex *bibtexRegex = [AGRegex regexWithPattern:@"@[[:alpha:]]+[
\\t]*[{(]"];
Modified: trunk/bibdesk/BDSKCOinSParser.m
===================================================================
--- trunk/bibdesk/BDSKCOinSParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKCOinSParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,7 +39,7 @@
#import "BDSKCOinSParser.h"
#import <AGRegex/AGRegex.h>
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
#import "BDSKLinkedFile.h"
#import "NSString_BDSKExtensions.h"
@@ -64,12 +64,12 @@
@implementation BDSKCOinSParser
+static NSString *hasCOinSNodesXPath = @"./body//span[@class='Z3988' and
string-length(@title)!=0]";
// Claim that the can parse the document if its markup contains the string
Z3988.
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
- NSError *error;
- NSArray *nodes = [[xmlDocument rootElement]
nodesForXPath:@"./body//span[@class='Z3988']" error:&error];
+ NSArray *nodes = [[domDocument documentElement]
nodesForXPath:hasCOinSNodesXPath];
return [nodes count] > 0;
}
@@ -106,8 +106,8 @@
}
- NSArray * components = [inputString
componentsSeparatedByString:@"&"];
- if ([components count] < 2 ) { return nil; }
+ NSArray * components = [inputString componentsSeparatedByString:@"&"];
+ if ([components count] < 2 ) { return nil; }
NSMutableDictionary *fieldsDict = [NSMutableDictionary dictionary];
NSMutableArray *files = [NSMutableArray array];
@@ -321,15 +321,14 @@
// Process the document.
- (NSArray *)itemsReturningError:(NSError **)outError {
- NSArray *nodes = [[[self xmlDocument] rootElement]
nodesForXPath:@"./body//span[@class='Z3988']" error:outError];
+ NSArray *nodes = [[[self domDocument] documentElement]
nodesForXPath:hasCOinSNodesXPath];
NSMutableArray *items = [NSMutableArray arrayWithCapacity:[nodes count]];
- for (NSXMLNode *node in nodes) {
+ for (DOMNode *node in nodes) {
NSString *title;
BibItem *bibItem;
- if ([node kind] == NSXMLElementKind &&
- (title = [[(NSXMLElement *)node attributeForName:@"title"]
XMLString]) &&
+ if ((title = [(DOMElement *)node getAttribute:@"title"]) &&
(bibItem = [BDSKCOinSParser parseCOinSString:title]))
[items addObject:bibItem];
}
Modified: trunk/bibdesk/BDSKCiteULikeParser.m
===================================================================
--- trunk/bibdesk/BDSKCiteULikeParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKCiteULikeParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -37,14 +37,13 @@
#import "BDSKCiteULikeParser.h"
#import "BibItem.h"
-#import "NSXMLNode_BDSKExtensions.h"
@implementation BDSKCiteULikeParser
+ (NSString *)citationNodeXPath { return
@"./body//div[@id='export']/table//td/a[text()='BibTeX']"; }
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
- NSMutableString *urlString = [[[node stringValueOfAttribute:@"href"]
mutableCopy] autorelease];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+ NSMutableString *urlString = [[[(DOMElement *)node getAttribute:@"href"]
mutableCopy] autorelease];
[urlString replaceOccurrencesOfString:@"/bibtex_options/"
withString:@"/bibtex/" options:NSCaseInsensitiveSearch range:NSMakeRange(0,
[urlString length])];
Modified: trunk/bibdesk/BDSKDOIWebParser.m
===================================================================
--- trunk/bibdesk/BDSKDOIWebParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKDOIWebParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -37,7 +37,7 @@
*/
#import "BDSKDOIWebParser.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
#import "NSURL_BDSKExtensions.h"
#import "NSString_BDSKExtensions.h"
#import <AGRegex/AGRegex.h>
@@ -45,25 +45,29 @@
@implementation BDSKDOIWebParser
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url {
- NSXMLNode *rootElement = [xmlDocument rootElement];
- NSError *error;
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url {
+ DOMNode *rootElement = [domDocument documentElement];
NSString *doiXPath;
NSArray *nodes;
- doiXPath = @"./head/meta[contains(' citation_doi doi prism.doi
dc.identifier ', concat(' ',lower-case(@name),' ') and
(starts-with(lower-case(@content),'doi:') or starts-with(@content,'10.'))][1]";
- nodes = [rootElement nodesForXPath:doiXPath error:&error];
+ doiXPath =
@"./head/meta[starts-with(translate(@content,'DOI','doi'),'doi:') or
starts-with(@content,'10.')]";
+ nodes = [rootElement nodesForXPath:doiXPath];
- if ([nodes count] > 0)
- return YES;
+ if ([nodes count] > 0) {
+ NSSet *names = [NSSet setWithObjects:@"citation_doi", @"doi",
@"prism.doi", @"dc.identifier", nil];
+ for (DOMNode *node in nodes) {
+ if ([names containsObject:[[node nodeName] lowercaseString]])
+ return YES;
+ }
+ }
doiXPath = @"./body//a[starts-with(@href,'https://doi.org/') or
starts-with(@href,'http://dx.doi.org/')]";
- nodes = [rootElement nodesForXPath:doiXPath error:&error];
+ nodes = [rootElement nodesForXPath:doiXPath];
if ([nodes count] > 0)
return YES;
- NSString *text = [xmlDocument textStringValue];
+ NSString *text = [[domDocument body] textContent];
AGRegex *doiRegex = [AGRegex regexWithPattern:@"(doi:[
\\t]*|https?://(dx\\.)?doi\\.org/)10\\.[0-9]{4,}(\\.[0-9]+)*/\\S+"
options:AGRegexCaseInsensitive];
return nil != [doiRegex findInString:text];
@@ -72,32 +76,39 @@
- (NSArray *)itemsReturningError:(NSError **)outError {
NSMutableArray *dois = [NSMutableArray array];
NSURL *baseURL = [NSURL URLWithString:@"https://doi.org/"];
- NSXMLNode *rootElement = [[self xmlDocument] rootElement];
+ DOMNode *rootElement = [[self domDocument] documentElement];
NSString *doiXPath;
NSArray *nodes = nil;
- NSError *error = nil;
AGRegex *doiRegex = [AGRegex
regexWithPattern:@"^(doi:|https?://(dx\\.)?doi\\.org/)?(10\\.[0-9]{4,}(\\.[0-9]+)*/\\S+)$"
options:AGRegexCaseInsensitive];
AGRegexMatch *match;
NSString *doi;
- doiXPath = @"./head/meta[contains(' citation_doi doi prism.doi
dc.identifier ', concat(' ', lower-case(@name), ' ') and
(starts-with(lower-case(@content),'doi:') or starts-with(@content,'10.'))][1]";
- nodes = [rootElement nodesForXPath:doiXPath error:&error];
+ doiXPath =
@"./head/meta[starts-with(translate(@content,'DOI','doi'),'doi:') or
starts-with(@content,'10.')]";
+ nodes = [rootElement nodesForXPath:doiXPath];
if ([nodes count] > 0) {
- doi = [[nodes firstObject] stringValueOfAttribute:@"content"];
- if (doi && (match = [doiRegex findInString:doi])) {
- doi = [match groupAtIndex:3];
- if ([[match groupAtIndex:1] hasPrefix:@"http"] == NO)
- doi = [doi stringByAddingPercentEscapes];
- [dois addObject:doi];
+ NSSet *names = [NSSet setWithObjects:@"citation_doi", @"doi",
@"prism.doi", @"dc.identifier", nil];
+ for (DOMElement *node in nodes) {
+ if ([names containsObject:[[node nodeName] lowercaseString]]) {
+ doi = [node getAttribute:@"content"];
+ if (doi && (match = [doiRegex findInString:doi])) {
+ doi = [match groupAtIndex:3];
+ if ([[match groupAtIndex:1] hasPrefix:@"http"] == NO)
+ doi = [doi stringByAddingPercentEscapes];
+ [dois addObject:doi];
+ }
+ break;
+ }
}
- } else {
+ }
+
+ if ([dois count] == 0) {
doiXPath = @"./body//a[starts-with(@href,'https://doi.org/') or
starts-with(@href,'http://dx.doi.org/')]";
- nodes = [rootElement nodesForXPath:doiXPath error:&error];
+ nodes = [rootElement nodesForXPath:doiXPath];
- for (NSXMLNode *node in nodes) {
- doi = [node stringValueOfAttribute:@"href"];
+ for (DOMElement *node in nodes) {
+ doi = [node getAttribute:@"href"];
if (doi && (match = [doiRegex findInString:doi])) {
doi = [[match groupAtIndex:3]
stringByTrimmingCharactersInSet:[NSCharacterSet punctuationCharacterSet]];
if ([dois containsObject:doi] == NO)
@@ -107,7 +118,7 @@
}
if ([dois count] == 0) {
- NSString *text = [[self xmlDocument] textStringValue];
+ NSString *text = [[[self domDocument] body] textContent];
doiRegex = [AGRegex regexWithPattern:@"(doi:[
\\t]*|https?://(dx\\.)?doi\\.org/)(10\\.[0-9]{4,}(\\.[0-9]+)*/\\S+)"
options:AGRegexCaseInsensitive];
@@ -128,9 +139,6 @@
[self addDownloadWithRequest:request contextInfo:nil];
}
- if ([dois count] == 0 && outError)
- *outError = error;
-
return nil;
}
Modified: trunk/bibdesk/BDSKGoogleScholarParser.m
===================================================================
--- trunk/bibdesk/BDSKGoogleScholarParser.m 2018-08-22 06:30:25 UTC (rev
22508)
+++ trunk/bibdesk/BDSKGoogleScholarParser.m 2018-08-22 14:19:07 UTC (rev
22509)
@@ -36,6 +36,7 @@
*/
#import "BDSKGoogleScholarParser.h"
+#import "DOMNode_BDSKExtensions.h"
#define BDSKDisableGoogleScholarListParsingKey
@"BDSKDisableGoogleScholarListParsing"
@@ -43,7 +44,7 @@
+ (NSString *)citationNodeXPath { return
@"./body//a[contains(text(),'BibTeX')]"; }
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
// !!! other countries end up with e.g. scholar.google.be; checking for
scholar.google.com may fail in those cases
// also some sites access google scholar via an ezproxy, so the suffix
could be quite complex
if (nil == [url host] || NO == [[[url host] lowercaseString]
hasPrefix:@"scholar.google."] ||
@@ -51,8 +52,7 @@
return NO;
}
- NSError *error = nil;
- NSUInteger nodecount = [[[xmlDocument rootElement] nodesForXPath:[self
citationNodeXPath] error:&error] count];
+ NSUInteger nodecount = [[[domDocument documentElement] nodesForXPath:[self
citationNodeXPath]] count];
return nodecount > 0;
}
Modified: trunk/bibdesk/BDSKHCiteParser.m
===================================================================
--- trunk/bibdesk/BDSKHCiteParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKHCiteParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,23 +39,24 @@
#import <WebKit/WebKit.h>
#import "BibItem.h"
#import "BDSKTypeManager.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
@interface BDSKHCiteParser (Private)
-- (NSDate *)dateFromNode:(NSXMLNode *)node;
-- (NSString *)BTAuthorStringFromVCardNode:(NSXMLNode *)node;
-- (NSMutableDictionary *)dictionaryFromCitationNode:(NSXMLNode *)citationNode;
+- (NSDate *)dateFromNode:(DOMNode *)node;
+- (NSString *)BTAuthorStringFromVCardNode:(DOMNode *)node;
+- (NSMutableDictionary *)dictionaryFromCitationNode:(DOMNode *)citationNode
isContainer:(BOOL)isContainer;
@end
-
@implementation BDSKHCiteParser
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
- NSError *error = nil;
- return [[[xmlDocument rootElement]
descendantOrSelfNodesWithClassName:@"hcite" error:&error] count] > 0;
+ NSString *hciteXpath = @".//*[contains(concat(' ',normalize-space(@class),
' '),' hcite ') and not(contains(concat(' ', normalize-space(@class), ' '),'
container '))]";
+ NSArray *mainNodes = [[domDocument documentElement]
nodesForXPath:hciteXpath];
+
+ return [mainNodes count] > 0;
}
- (NSArray *)itemsReturningError:(NSError **)outError {
@@ -62,17 +63,16 @@
NSMutableArray *items = [NSMutableArray arrayWithCapacity:0];
- NSArray *mainNodes = [[[self xmlDocument] rootElement]
descendantOrSelfNodesWithClassName:@"hcite" error:outError];
+ // get hcite elements, avoid creating top-level refs from containers:
+ NSString *hciteXpath = @".//*[contains(concat(' ',normalize-space(@class),
' '),' hcite ') and not(contains(concat(' ', normalize-space(@class), ' '),'
container '))]";
+ NSArray *mainNodes = [[[self domDocument] documentElement]
nodesForXPath:hciteXpath];
- for (NSXMLNode *obj in mainNodes) {
+ for (DOMNode *obj in mainNodes) {
NSMutableDictionary *rd = nil;
NSString *type = nil;
- // avoid creating top-level refs from containers:
- if([[obj classNames] containsObject:@"container"]) continue;
-
- rd = [self dictionaryFromCitationNode:obj];
+ rd = [self dictionaryFromCitationNode:obj isContainer:NO];
type = [rd objectForKey:BDSKTypeString];
[rd removeObjectForKey:BDSKTypeString];
@@ -84,22 +84,17 @@
return items;
}
-- (NSMutableDictionary *)dictionaryFromCitationNode:(NSXMLNode *)citationNode{
+- (NSMutableDictionary *)dictionaryFromCitationNode:(DOMNode *)citationNode
isContainer:(BOOL)isContainer{
BDSKTypeManager *typeMan = [BDSKTypeManager sharedManager];
NSMutableDictionary *rd = [NSMutableDictionary dictionaryWithCapacity:0];
- NSError *err = nil;
- NSUInteger i = 0;
-
// find type but not type that's a descendant of 'container'.
- NSArray *typeNodes = [citationNode
descendantOrSelfNodesWithClassName:@"type" error:&err];
+ NSArray *typeNodes = [citationNode
descendantOrSelfNodesWithClassName:@"type"];
NSString *typeString = nil;
- for (i = 0; i < [typeNodes count]; i++) {
- NSXMLNode *node = [typeNodes objectAtIndex:i];
- if(![[citationNode classNames] containsObject:@"container"] &&
- [node hasParentWithClassName:@"container"] ) continue;
- typeString = [node fullStringValueIfABBR];
+ for (DOMNode *node in typeNodes) {
+ if(isContainer || [node hasParentWithClassName:@"container"] == NO)
+ typeString = [node fullStringValueIfABBR];
}
if(typeString != nil){
@@ -111,187 +106,151 @@
// find title node
- NSArray *titleNodes = [citationNode
descendantOrSelfNodesWithClassName:@"title" error:&err];
+ NSArray *titleNodes = [citationNode
descendantOrSelfNodesWithClassName:@"title"];
- for(i = 0; i < [titleNodes count]; i++){
- NSXMLNode *node = [titleNodes objectAtIndex:i];
- if(![[citationNode classNames] containsObject:@"container"] &&
- [node hasParentWithClassName:@"container"]){
- // note: todo - avoid second hasParentWithClassName by finding
container
- // nodes first and caching those then checking against them here.
(if necessary)
- continue; // deal with this citation's container later
- }
-
- [rd setObject:[node stringValue] forKey:BDSKTitleString];
+ for(DOMNode *node in titleNodes){
+ // deal with this citation's container later
+ // note: todo - avoid second hasParentWithClassName by finding
container
+ // nodes first and caching those then checking against them here. (if
necessary)
+ if(isContainer || [node hasParentWithClassName:@"container"] == NO)
+ [rd setObject:[node stringValue] forKey:BDSKTitleString];
}
// find authors
-
- NSArray *authorNodes = [citationNode
descendantOrSelfNodesWithClassName:@"creator" error:&err];
+
+ NSString *authorXpath = @".//*[contains(concat('
',normalize-space(@class), ' '),' creator ') and contains(concat('
',normalize-space(@class), ' '),' vcard ')]";
+ NSArray *authorNodes = [citationNode nodesForXPath:authorXpath];
NSMutableString *BTAuthString = [NSMutableString stringWithCapacity:0];
- for(i = 0; i < [authorNodes count]; i++){
- NSXMLNode *node = [authorNodes objectAtIndex:i];
- if (! [[node classNames] containsObject:@"vcard"]) continue;
-
- if(i > 0)[BTAuthString appendFormat:@" and "];
-
+ for(DOMNode *node in authorNodes){
+ if ([BTAuthString length]) [BTAuthString appendFormat:@" and "];
[BTAuthString appendString:[self BTAuthorStringFromVCardNode:node]];
-
}
[rd setObject:BTAuthString forKey:BDSKAuthorString];
// find keywords
- NSArray *tagNodes = [citationNode nodesForXPath:@".//*[contains(concat('
', normalize-space(@rel), ' '), ' tag ')]" error:&err];
- NSMutableString *BTKeywordString = [NSMutableString stringWithCapacity:0];
-
- for(i = 0; i < [tagNodes count]; i++){
- NSXMLNode *node = [tagNodes objectAtIndex:i];
-
- if(i > 0)[BTKeywordString appendFormat:@"; "];
-
- [BTKeywordString appendString:[node stringValue]];
-
- }
- [rd setObject:BTKeywordString forKey:BDSKKeywordsString];
-
- // find description (append multiple descriptions to avoid data loss)
-
- NSMutableArray *descNodes = [NSMutableArray arrayWithCapacity:0];
- [descNodes addObjectsFromArray:[citationNode
descendantOrSelfNodesWithClassName:@"description" error:&err]];
- [descNodes addObjectsFromArray:[citationNode
descendantOrSelfNodesWithClassName:@"abstract" error:&err]];
-
- NSMutableString *BTDescString = [NSMutableString stringWithCapacity:0];
-
- for(i = 0; i < [descNodes count]; i++){
- NSXMLNode *node = [descNodes objectAtIndex:i];
-
- if(i > 0)[BTDescString appendFormat:@"\n"];
-
- [BTDescString appendString:[node stringValue]];
-
- }
- [rd setObject:BTDescString forKey:BDSKAbstractString];
-
-
- // find date published
-
- NSArray *datePublishedNodes = [citationNode
descendantOrSelfNodesWithClassName:@"date-published" error:&err];
-
- if([datePublishedNodes count] > 0) {
- NSXMLNode *datePublishedNode = [datePublishedNodes objectAtIndex:0];
// Only use the first such node.
- NSDate *datePublished = [self dateFromNode:datePublishedNode];
- NSDateFormatter *formatter = [[NSDateFormatter alloc] init];
- [formatter setFormatterBehavior:NSDateFormatterBehavior10_4];
- [formatter setDateFormat:@"yyyy"];
- [rd setObject:[formatter stringFromDate:datePublished]
forKey:BDSKYearString];
- [formatter setDateFormat:@"MMMM"];
- [rd setObject:[formatter stringFromDate:datePublished]
forKey:BDSKMonthString];
- [formatter release];
- }
-
- // find issue
-
- NSArray *issueNodes = [citationNode
descendantOrSelfNodesWithClassName:@"issue" error:&err];
-
- if([issueNodes count] > 0) {
- NSXMLNode *issueNode = [issueNodes objectAtIndex:0]; // Only use the
first such node.
+ NSArray *tagNodes = [citationNode nodesForXPath:@".//*[contains(concat('
', normalize-space(@rel), ' '), ' tag ')]"];
+
+ [rd setObject:[[tagNodes valueForKey:@"stringValue"]
componentsJoinedByString:@"; "] forKey:BDSKKeywordsString];
+
+ // find description (append multiple descriptions to avoid data loss)
+
+ NSMutableArray *descNodes = [NSMutableArray arrayWithCapacity:0];
+ [descNodes addObjectsFromArray:[citationNode
descendantOrSelfNodesWithClassName:@"description"]];
+ [descNodes addObjectsFromArray:[citationNode
descendantOrSelfNodesWithClassName:@"abstract"]];
+
+ [rd setObject:[[descNodes valueForKey:@"stringValue"]
componentsJoinedByString:@"\n"] forKey:BDSKAbstractString];
+
+ // find date published
+
+ NSArray *datePublishedNodes = [citationNode
descendantOrSelfNodesWithClassName:@"date-published"];
+
+ if([datePublishedNodes count] > 0) {
+ DOMNode *datePublishedNode = [datePublishedNodes objectAtIndex:0]; //
Only use the first such node.
+ NSDate *datePublished = [self dateFromNode:datePublishedNode];
+ NSDateFormatter *formatter = [[NSDateFormatter alloc] init];
+ [formatter setFormatterBehavior:NSDateFormatterBehavior10_4];
+ [formatter setDateFormat:@"yyyy"];
+ [rd setObject:[formatter stringFromDate:datePublished]
forKey:BDSKYearString];
+ [formatter setDateFormat:@"MMMM"];
+ [rd setObject:[formatter stringFromDate:datePublished]
forKey:BDSKMonthString];
+ [formatter release];
+ }
+
+ // find issue
+
+ NSArray *issueNodes = [citationNode
descendantOrSelfNodesWithClassName:@"issue"];
+
+ if([issueNodes count] > 0) {
+ // Only use the first such node.
+ [rd setObject:[[issueNodes objectAtIndex:0] stringValue]
forKey:@"Issue"];
+ }
+
+ // find pages
+
+ NSArray *pagesNodes = [citationNode
descendantOrSelfNodesWithClassName:@"pages"];
+
+ if([pagesNodes count] > 0) {
+ // Only use the first such node.
+ [rd setObject:[[pagesNodes objectAtIndex:0] stringValue]
forKey:BDSKPagesString];
+ }
+
+ // find URI
+
+ NSArray *URINodes = [citationNode
descendantOrSelfNodesWithClassName:@"uri"];
+
+ if([URINodes count] > 0) {
+ DOMElement *URINode = [URINodes objectAtIndex:0]; // Only use the
first such node.
+ NSString *URIString = nil;
+
+ if([[URINode nodeName] isCaseInsensitiveEqual:@"a"]){
+ URIString = [URINode getAttribute:@"href"];
+ }else{
+ URIString = [URINode fullStringValueIfABBR];
+ }
+
+ if([URIString hasCaseInsensitivePrefix:@"http://"] || [URIString
hasCaseInsensitivePrefix:@"https://"]){
+ [rd setObject:URIString forKey:BDSKUrlString];
+ } else {
+ [rd setObject:URIString forKey:@"Uri"];
+
+ }
+ }
+
+ // get container info:
+ // *** NOTE: should do this last, to avoid overwriting data
+
+ NSString *containerXpath = @".//*[contains(concat('
',normalize-space(@class), ' '),' hcite ') and contains(concat(' ',
normalize-space(@class), ' '),' container ')]";
+ NSArray *containerNodes = [citationNode nodesForXPath:containerXpath];
+
+ if([containerNodes count] > 0) {
+ DOMNode *containerNode = [containerNodes objectAtIndex:0];
- [rd setObject:[issueNode stringValue] forKey:@"Issue"];
- }
-
- // find pages
-
- NSArray *pagesNodes = [citationNode
descendantOrSelfNodesWithClassName:@"pages" error:&err];
-
- if([pagesNodes count] > 0) {
- NSXMLNode *pagesNode = [pagesNodes objectAtIndex:0]; // Only use the
first such node.
-
- [rd setObject:[pagesNode stringValue] forKey:BDSKPagesString];
- }
-
- // find URI
-
- NSArray *URINodes = [citationNode
descendantOrSelfNodesWithClassName:@"uri" error:&err];
-
- if([URINodes count] > 0) {
- NSXMLNode *URINode = [URINodes objectAtIndex:0]; // Only use the
first such node.
- NSString *URIString = nil;
-
- if([[URINode name] isEqualToString:@"a"]){
- URIString = [URINode stringValueOfAttribute:@"href"];
- }else{
- URIString = [URINode fullStringValueIfABBR];
- }
-
- if([URIString hasCaseInsensitivePrefix:@"http://"] || [URIString
hasCaseInsensitivePrefix:@"https://"]){
- [rd setObject:URIString forKey:BDSKUrlString];
- } else {
- [rd setObject:URIString forKey:@"Uri"];
+ NSString *citationType = [rd objectForKey:BDSKTypeString];
- }
- }
-
- // get container info:
- // *** NOTE: should do this last, to avoid overwriting data
-
- NSArray *containerNodes = [citationNode
descendantOrSelfNodesWithClassName:@"container"
-
error:&err];
-
- if([containerNodes count] > 0) {
- NSXMLNode *containerNode = [containerNodes objectAtIndex:0];
-
- if([[containerNode classNames] containsObject:@"hcite"]){
- NSString *citationType = [rd objectForKey:BDSKTypeString];
-
- NSMutableDictionary *containerDict = [NSMutableDictionary
dictionaryWithDictionary:[self dictionaryFromCitationNode:containerNode]];
- NSString *containerTitle = [containerDict
objectForKey:BDSKTitleString];
- NSString *containerType = [containerDict
objectForKey:BDSKTypeString];
-
- if(containerType != nil && containerTitle != nil){
- // refine type based on container type
- if([citationType isEqualToString:@"misc"]){
- if([containerType isEqualToString:@"journal"]){
- [rd setObject:BDSKArticleString
forKey:BDSKTypeString];
- }else if([containerType isEqualToString:@"proceedings"]){
- [rd setObject:BDSKInproceedingsString
forKey:BDSKTypeString];
- }
+ NSMutableDictionary *containerDict = [NSMutableDictionary
dictionaryWithDictionary:[self dictionaryFromCitationNode:containerNode
isContainer:YES]];
+ NSString *containerTitle = [containerDict
objectForKey:BDSKTitleString];
+ NSString *containerType = [containerDict objectForKey:BDSKTypeString];
+
+ if(containerType != nil && containerTitle != nil){
+ // refine type based on container type
+ if([citationType isEqualToString:@"misc"]){
+ if([containerType isEqualToString:@"journal"]){
+ [rd setObject:BDSKArticleString forKey:BDSKTypeString];
+ }else if([containerType isEqualToString:@"proceedings"]){
+ [rd setObject:BDSKInproceedingsString
forKey:BDSKTypeString];
+ }
+ }
- }
-
- // refresh:
- citationType = [rd objectForKey:BDSKTypeString];
-
- if([citationType isEqualToString:BDSKArticleString]){
- [rd setObject:containerTitle forKey:BDSKJournalString];
- }else if([citationType
isEqualToString:BDSKIncollectionString] ||
- [citationType
isEqualToString:BDSKInproceedingsString]){
- [rd setObject:containerTitle forKey:BDSKBooktitleString];
- }else if([citationType isEqualToString:BDSKInbookString]){
- // TODO: this case may need some tweaking
- [rd setObject:[rd objectForKey:BDSKTitleString]
forKey:BDSKChapterString];
- [rd setObject:containerTitle forKey:BDSKTitleString];
- }else{
- [rd setObject:containerTitle forKey:BDSKBooktitleString];
- }
- }
- // Containers have more info than just title and type:
- // TODO: do we only dump it in or do we need to do more?
- [containerDict removeObjectsForKeys:[rd allKeys]];
- [rd addEntriesFromDictionary:containerDict];
- }
-
+ // refresh:
+ citationType = [rd objectForKey:BDSKTypeString];
+
+ if([citationType isEqualToString:BDSKArticleString]){
+ [rd setObject:containerTitle forKey:BDSKJournalString];
+ }else if([citationType isEqualToString:BDSKIncollectionString] ||
[citationType isEqualToString:BDSKInproceedingsString]){
+ [rd setObject:containerTitle forKey:BDSKBooktitleString];
+ }else if([citationType isEqualToString:BDSKInbookString]){
+ // TODO: this case may need some tweaking
+ [rd setObject:[rd objectForKey:BDSKTitleString]
forKey:BDSKChapterString];
+ [rd setObject:containerTitle forKey:BDSKTitleString];
+ }else{
+ [rd setObject:containerTitle forKey:BDSKBooktitleString];
+ }
+ }
+ // Containers have more info than just title and type:
+ // TODO: do we only dump it in or do we need to do more?
+ [containerDict removeObjectsForKeys:[rd allKeys]];
+ [rd addEntriesFromDictionary:containerDict];
+
}
return rd;
}
-- (NSString *)BTAuthorStringFromVCardNode:(NSXMLNode *)node{
- NSError *err;
-
+- (NSString *)BTAuthorStringFromVCardNode:(DOMNode *)node{
// note: may eventually need to do more than just look at fn and abbr.
- NSArray *fnNodes = [node descendantOrSelfNodesWithClassName:@"fn"
error:&err];
+ NSArray *fnNodes = [node descendantOrSelfNodesWithClassName:@"fn"];
if([fnNodes count] < 1) return @"";
@@ -298,7 +257,7 @@
return [[fnNodes objectAtIndex:0] fullStringValueIfABBR];
}
-- (NSDate *)dateFromNode:(NSXMLNode *)node{
+- (NSDate *)dateFromNode:(DOMNode *)node{
NSString *fullString = [node fullStringValueIfABBR];
NSDate *d;
Modified: trunk/bibdesk/BDSKHubmedParser.m
===================================================================
--- trunk/bibdesk/BDSKHubmedParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKHubmedParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,12 +39,11 @@
#import <WebKit/WebKit.h>
#import "BibItem.h"
#import <AGRegex/AGRegex.h>
-#import "NSXMLNode_BDSKExtensions.h"
#import "NSString_BDSKExtensions.h"
@implementation BDSKHubmedParser
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
if ([url host] == nil || [[url host]
isCaseInsensitiveEqual:@"www.hubmed.org"] == NO ||
[[url path] isCaseInsensitiveEqual:@"/display.cgi"] == NO){
Modified: trunk/bibdesk/BDSKIACRParser.m
===================================================================
--- trunk/bibdesk/BDSKIACRParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKIACRParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -38,7 +38,7 @@
#import "BDSKIACRParser.h"
#import "BibItem.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
#import "NSURL_BDSKExtensions.h"
#import <AGRegex/AGRegex.h>
@@ -45,7 +45,7 @@
@implementation BDSKIACRParser
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
if ([url hasDomain:@"eprint.iacr.org"] == NO)
return NO;
@@ -75,30 +75,42 @@
// construct the source item(s) to parse
NSArray *sources = nil;
- NSXMLElement *rootElement = [[self xmlDocument] rootElement];
+ DOMElement *rootElement = [[self domDocument] documentElement];
if (isSearch)
- sources = [rootElement nodesForXPath:@"./body//dt" error:outError];
+ sources = [rootElement nodesForXPath:@"./body//dt"];
else
sources = [NSArray arrayWithObjects:rootElement, nil];
- for (NSXMLNode *xmlNode in sources) {
+ for (DOMNode *node in sources) {
NSMutableDictionary *pubFields = [NSMutableDictionary
dictionary];
NSArray *filesArray = nil;
- NSString *pathToSearch;
+ NSString *pathToSearch = nil;
+ NSArray *nodes;
+ NSString *string;
if (isSearch) {
// set title
- [xmlNode searchXPath:@"following-sibling::dd/b" addTo:pubFields
forKey:BDSKTitleString];
+ nodes = [node nodesForXPath:@"following-sibling::dd/b"];
+ if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+ [pubFields setObject:string forKey:BDSKTitleString];
+ nodes = [node
nodesForXPath:@"following-sibling::dd[position()=2]/em"];
// set authors
- [xmlNode searchXPath:@"following-sibling::dd[position()=2]/em"
addTo:pubFields forKey:BDSKAuthorString];
+ if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+ [pubFields setObject:string forKey:BDSKAuthorString];
+ nodes = [node nodesForXPath:@".//a/@href"];
// to get year and report number
- pathToSearch = [xmlNode searchXPath:@".//a/@href" addTo:nil
forKey:nil];
+ if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+ pathToSearch = string;
} else {
// set title
- [xmlNode searchXPath:@".//b" addTo:pubFields
forKey:BDSKTitleString];
+ nodes = [node nodesForXPath:@".//b"];
+ if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+ [pubFields setObject:string forKey:BDSKTitleString];
// set authors
- [xmlNode searchXPath:@".//i" addTo:pubFields
forKey:BDSKAuthorString];
+ nodes = [node nodesForXPath:@".//i"];
+ if ([nodes count] && (string = [[nodes firstObject] stringValue]))
+ [pubFields setObject:string forKey:BDSKAuthorString];
// to get year and report number
pathToSearch = [url path];
}
Modified: trunk/bibdesk/BDSKIEEEXploreParser.m
===================================================================
--- trunk/bibdesk/BDSKIEEEXploreParser.m 2018-08-22 06:30:25 UTC (rev
22508)
+++ trunk/bibdesk/BDSKIEEEXploreParser.m 2018-08-22 14:19:07 UTC (rev
22509)
@@ -38,7 +38,7 @@
#import "BDSKIEEEXploreParser.h"
#import <WebKit/WebKit.h>
#import "BibItem.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
#import "NSError_BDSKExtensions.h"
#import "NSArray_BDSKExtensions.h"
#import <AGRegex/AGRegex.h>
@@ -45,13 +45,13 @@
#import "NSString_BDSKExtensions.h"
// sometimes the link says AbstractPlus, sometimes it only says Abstract. This
should catch both:
-static NSString *containsAbstractPlusLinkNode =
@"./body//a[contains(lower-case(text()),'abstract')]";
+static NSString *containsAbstractPlusLinkNode =
@"./body//a[contains(translate(text(),'ABSTRACT','abstract'),'abstract')]";
static NSString *abstractPageURLPath = @"/xpls/abs_all.jsp";
static NSString *searchResultPageURLPath = @"/search/srchabstract.jsp";
@implementation BDSKIEEEXploreParser
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
if (nil == [url host] || [[url host]
isCaseInsensitiveEqual:@"ieeexplore.ieee.org"] == NO)
return NO;
@@ -59,7 +59,7 @@
if ([[url path] isCaseInsensitiveEqual:abstractPageURLPath] || [[url
path] isCaseInsensitiveEqual:searchResultPageURLPath])
return YES;
- return [[[xmlDocument rootElement]
nodesForXPath:containsAbstractPlusLinkNode error:NULL] count] > 0;
+ return [[[domDocument documentElement]
nodesForXPath:containsAbstractPlusLinkNode] count] > 0;
}
- (NSArray *)itemsReturningError:(NSError **)outError {
@@ -86,10 +86,10 @@
[abstractPageURLs addObject:url];
} else {
// parse all links on a TOC page
- NSArray *abstractPlusLinkNodes = [[[self xmlDocument] rootElement]
nodesForXPath:containsAbstractPlusLinkNode error:outError];
+ NSArray *abstractPlusLinkNodes = [[[self domDocument] documentElement]
nodesForXPath:containsAbstractPlusLinkNode];
- for (NSXMLNode *aplinknode in abstractPlusLinkNodes) {
- NSString *hrefValue = [aplinknode stringValueOfAttribute:@"href"];
+ for (DOMElement *aplinknode in abstractPlusLinkNodes) {
+ NSString *hrefValue = [aplinknode getAttribute:@"href"];
[abstractPageURLs addObject:[NSURL URLWithString:hrefValue
relativeToURL:url]];
}
}
Modified: trunk/bibdesk/BDSKIUCrParser.m
===================================================================
--- trunk/bibdesk/BDSKIUCrParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKIUCrParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -37,7 +37,6 @@
*/
#import "BDSKIUCrParser.h"
-#import "NSXMLNode_BDSKExtensions.h"
@implementation BDSKIUCrParser
@@ -44,8 +43,8 @@
+ (NSString *)citationNodeXPath { return
@"./body//table[@class='citation']//td/input[@name='cnor' and
string-length(@value)!=0]"; }
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
- NSString *cnorValue = [node stringValueOfAttribute:@"value"];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+ NSString *cnorValue = [(DOMElement *)node getAttribute:@"value"];
return
[@"//scripts.iucr.org/cgi-bin/biblio?Action=download&saveas=BIBTeX&cnor="
stringByAppendingString:cnorValue];
}
Modified: trunk/bibdesk/BDSKInspireParser.m
===================================================================
--- trunk/bibdesk/BDSKInspireParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKInspireParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,7 +39,7 @@
#import "BDSKInspireParser.h"
#import "BDSKBibTeXParser.h"
#import "NSError_BDSKExtensions.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
#import "NSURL_BDSKExtensions.h"
@@ -60,7 +60,7 @@
return bibtexString;
}
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url {
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url {
if ([url host] == nil || [[[url host] lowercaseString]
isEqualToString:@"inspirehep.net"] == NO)
return NO;
@@ -67,8 +67,7 @@
if ([url hasFirstPathComponent:@"record"])
return YES;
- NSError *error = nil;
- NSUInteger nodeCount = [[[xmlDocument rootElement] nodesForXPath:[self
citationNodeXPath] error:&error] count];
+ NSUInteger nodeCount = [[[domDocument documentElement] nodesForXPath:[self
citationNodeXPath]] count];
return nodeCount > 0;
}
@@ -79,8 +78,8 @@
NSMutableArray *items = [NSMutableArray array];
NSString *bibtexString = nil;
- NSArray *preNodes = [[[self xmlDocument] rootElement]
nodesForXPath:@"./body/div/div/pre[contains(text(),'@')]" error:outError];
- bibtexString = [[[[preNodes firstObject] stringValue] retain]
autorelease];
+ NSArray *preNodes = [[[self domDocument] documentElement]
nodesForXPath:@"./body/div/div/pre[contains(text(),'@')]"];
+ bibtexString = [[[[preNodes firstObject] textContent] retain]
autorelease];
NSArray *bibtexItems = nil;
if (bibtexString)
Modified: trunk/bibdesk/BDSKJSTORWebParser.m
===================================================================
--- trunk/bibdesk/BDSKJSTORWebParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKJSTORWebParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -37,7 +37,6 @@
*/
#import "BDSKJSTORWebParser.h"
-#import "NSXMLNode_BDSKExtensions.h"
#import "NSURL_BDSKExtensions.h"
#import <AGRegex/AGRegex.h>
@@ -44,7 +43,7 @@
@implementation BDSKJSTORWebParser
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
if ([url hasDomain:@"jstor.org"] == NO)
return NO;
Modified: trunk/bibdesk/BDSKMASParser.m
===================================================================
--- trunk/bibdesk/BDSKMASParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKMASParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -39,7 +39,6 @@
#import "BDSKMASParser.h"
#import "BibItem.h"
#import "NSString_BDSKExtensions.h"
-#import "NSXMLNode_BDSKExtensions.h"
#import <AGRegex/AGRegex.h>
@@ -47,8 +46,8 @@
+ (NSString *)citationNodeXPath { return
@"./body//a[starts-with(@href,'../../UserInput/EditPublication?id=') or
starts-with(@href,'Publication/') or starts-with(@href,'/Publication/')]"; }
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
- NSString *href = [node stringValueOfAttribute:@"href"];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+ NSString *href = [(DOMElement *)node getAttribute:@"href"];
NSString *pattern =
@"^\\.\\./\\.\\./UserInput/EditPublication\\?id\\=([0-9]+)$";
if ([href hasPrefix:@"Publication/"])
Modified: trunk/bibdesk/BDSKMathSciNetParser.m
===================================================================
--- trunk/bibdesk/BDSKMathSciNetParser.m 2018-08-22 06:30:25 UTC (rev
22508)
+++ trunk/bibdesk/BDSKMathSciNetParser.m 2018-08-22 14:19:07 UTC (rev
22509)
@@ -50,7 +50,7 @@
// MathSciNet is mirrored across different servers, don't use the server name
to recognise the URL.
// Instead recognise all URLs beginning with 'mathscinet', to match both
general MatSciNet URLs like <https://www.ams.org/mathscinet/...> and
MathSciNet reference URLS <https://www.ams.org/mathscinet-getitem?...>.
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
if ([url hasFirstPathComponent:@"mathscinet"] == NO)
return NO;
@@ -57,7 +57,7 @@
AGRegex * MRRegexp = [AGRegex regexWithPattern:@"MR0*([0-9]+)"
options:AGRegexMultiline];
- return nil != [MRRegexp findInString:[xmlDocument XMLString]];
+ return nil != [MRRegexp findInString:[[domDocument documentElement]
innerHTML]];
}
// Finds strings of type MR1234567 in the current page.
@@ -66,7 +66,7 @@
- (NSArray *)itemsReturningError:(NSError **)outError {
AGRegex * MRRegexp = [AGRegex regexWithPattern:@"MR0*([0-9]+)"
options:AGRegexMultiline];
- NSArray * regexpResults = [MRRegexp findAllInString:[[self xmlDocument]
XMLString]];
+ NSArray * regexpResults = [MRRegexp findAllInString:[[[self
domDocument] documentElement] innerHTML]];
NSArray * requests = nil;
Modified: trunk/bibdesk/BDSKNumdamParser.m
===================================================================
--- trunk/bibdesk/BDSKNumdamParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKNumdamParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -40,6 +40,7 @@
#import "BDSKMathSciNetParser.h"
#import "BDSKZentralblattParser.h"
#import "BibItem.h"
+#import "DOMNode_BDSKExtensions.h"
#import "NSURL_BDSKExtensions.h"
#import <AGRegex/AGRegex.h>
@@ -47,13 +48,12 @@
@implementation BDSKNumdamParser
// Recognise Numdam pages by their server name ending in numdam.org.
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
if ([url hasDomain:@"numdam.org"] == NO)
return NO;
- NSError *error;
- NSArray * tableCells = [[xmlDocument rootElement]
nodesForXPath:@".//td[@id='contenu']" error:&error];
+ NSArray * tableCells = [[domDocument documentElement]
nodesForXPath:@".//td[@id='contenu']"];
return [tableCells count] > 0;
}
@@ -62,9 +62,9 @@
// (Support for MatSciNet is currently commented out as their lookup script
requires online-style MR1234567 identifiers and NUMDAM uses paper-style
identifiers a la 16,957b.)
- (NSArray *)itemsReturningError:(NSError **)outError {
- NSArray * tableCells = [[[self xmlDocument] rootElement]
nodesForXPath:@".//td[@id='contenu']" error:outError];
- NSXMLElement * tableCell = [tableCells objectAtIndex:0];
- NSString * content = [tableCell stringValue];
+ NSArray * tableCells = [[[self domDocument] documentElement]
nodesForXPath:@".//td[@id='contenu']"];
+ DOMNode * tableCell = [tableCells objectAtIndex:0];
+ NSString * content = [tableCell textContent];
NSArray * rawReferences = [content componentsSeparatedByString:@"\n"];
Modified: trunk/bibdesk/BDSKProjectEuclidParser.m
===================================================================
--- trunk/bibdesk/BDSKProjectEuclidParser.m 2018-08-22 06:30:25 UTC (rev
22508)
+++ trunk/bibdesk/BDSKProjectEuclidParser.m 2018-08-22 14:19:07 UTC (rev
22509)
@@ -40,6 +40,7 @@
#import "BDSKMathSciNetParser.h"
#import "BDSKZentralblattParser.h"
#import "BibItem.h"
+#import "DOMNode_BDSKExtensions.h"
#import "NSURL_BDSKExtensions.h"
#import <AGRegex/AGRegex.h>
@@ -47,13 +48,12 @@
@implementation BDSKProjectEuclidParser
// Recognise Project Euclid pages by their server name ending in
projecteuclid.org.
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
if ([url hasDomain:@"projecteuclid.org"] == NO)
return NO;
- NSError *error;
- NSArray * identifiers = [[xmlDocument rootElement]
nodesForXPath:@".//div[@id='identifier']/p" error:&error];
+ NSArray * identifiers = [[domDocument documentElement]
nodesForXPath:@".//div[@id='identifier']/p"];
return [identifiers count] > 0;
}
@@ -61,7 +61,7 @@
// Find references for Mathematical Reviews and Zentralblatt Math in the page.
Then look them up, giving preference to MSN if both are available.
- (NSArray *)itemsReturningError:(NSError **)outError {
- NSArray * identifiers = [[[self xmlDocument] rootElement]
nodesForXPath:@".//div[@id='identifier']/p" error:outError];
+ NSArray * identifiers = [[[self domDocument] documentElement]
nodesForXPath:@".//div[@id='identifier']/p"];
NSArray *MRRequests = nil;
NSArray *ZMathRequests = nil;
@@ -68,7 +68,7 @@
if ( [identifiers count] ) {
- NSXMLElement * identifier = [identifiers objectAtIndex:0];
+ DOMNode * identifier = [identifiers objectAtIndex:0];
NSString * identifierString = [identifier stringValue];
AGRegex * MRRegexp = [AGRegex regexWithPattern:@"MR([1-9][0-9]*)"
options:0];
@@ -87,11 +87,11 @@
}
// Set up arrays for the lists of MathSciNet and Zentralblatt IDs.
These will have the ID for the current element at position 0 and contain NSNull
when no ID is found for the respective service.
- NSArray * references = [[[self xmlDocument] rootElement]
nodesForXPath:@".//div[@id='references']/div[@class='ref-block']"
error:outError];
+ NSArray * references = [[[self domDocument] documentElement]
nodesForXPath:@".//div[@id='references']/div[@class='ref-block']"];
NSMutableArray * MRIDs = [NSMutableArray arrayWithObjects:myMRID, nil];
NSMutableArray * ZMathIDs = [NSMutableArray
arrayWithObjects:myZMathID, nil];
- for (NSXMLElement * reference in references) {
+ for (DOMElement * reference in references) {
NSString * referenceString = [reference stringValue];
match = [MRRegexp findInString:referenceString];
Modified: trunk/bibdesk/BDSKSIAMParser.m
===================================================================
--- trunk/bibdesk/BDSKSIAMParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKSIAMParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -37,7 +37,6 @@
*/
#import "BDSKSIAMParser.h"
-#import "NSXMLNode_BDSKExtensions.h"
@implementation BDSKSIAMParser
@@ -44,8 +43,8 @@
+ (NSString *)citationNodeXPath { return @"./body//a[text()='Download
Citations']"; }
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
- NSMutableString *hrefValue = [[[node stringValueOfAttribute:@"href"]
mutableCopy] autorelease];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+ NSMutableString *hrefValue = [[[(DOMElement *)node getAttribute:@"href"]
mutableCopy] autorelease];
NSRange range = [hrefValue rangeOfString:@"showCitFormats"
options:NSCaseInsensitiveSearch];
if (range.location != NSNotFound)
[hrefValue replaceCharactersInRange:range
withString:@"downloadCitation"];
Modified: trunk/bibdesk/BDSKScienceDirectParser.m
===================================================================
--- trunk/bibdesk/BDSKScienceDirectParser.m 2018-08-22 06:30:25 UTC (rev
22508)
+++ trunk/bibdesk/BDSKScienceDirectParser.m 2018-08-22 14:19:07 UTC (rev
22509)
@@ -37,14 +37,13 @@
*/
#import "BDSKScienceDirectParser.h"
-#import "NSXMLNode_BDSKExtensions.h"
@implementation BDSKScienceDirectParser
+ (NSString *)citationNodeXPath { return @"./head/meta[@name='citation_pii']";
}
-+ (NSString *)citationURLStringFromNode:(NSXMLNode *)node {
- NSString *piiValue = [node stringValueOfAttribute:@"content"];
++ (NSString *)citationURLStringFromNode:(DOMNode *)node {
+ NSString *piiValue = [(DOMElement *)node getAttribute:@"content"];
return [NSString
stringWithFormat:@"https://www.sciencedirect.com/sdfe/arp/cite?pii=%@&format=text%%2Fx-bibtex&withabstract=true",
piiValue];
}
Modified: trunk/bibdesk/BDSKSpringerParser.m
===================================================================
--- trunk/bibdesk/BDSKSpringerParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKSpringerParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -40,7 +40,7 @@
#import "BibItem.h"
#import "NSError_BDSKExtensions.h"
#import "NSArray_BDSKExtensions.h"
-#import "NSXMLNode_BDSKExtensions.h"
+#import "DOMNode_BDSKExtensions.h"
#import "BDSKBibTeXParser.h"
#import <AGRegex/AGRegex.h>
@@ -48,35 +48,21 @@
// does the rest actually still work?
// was for path = /contents/...
@interface BDSKSpringerParser (BDSKPrivate)
-+ (BibItem *)newItemFromXMLDocument:(NSXMLDocument *)xmlDocument
fromURL:(NSURL *)url error:(NSError **)outError;
-+ (NSString *)authorStringFromXMLNode:(NSXMLNode *)xmlNode
searchXPath:(NSString *)xPath;
++ (BibItem *)newItemFromDocument:(DOMDocument *)domDocument fromURL:(NSURL
*)url error:(NSError **)outError;
@end
@implementation BDSKSpringerParser
-+ (NSString *)citationNodeXPath { return @"./body//a[@data-gtmlabel='BIB']"; }
-
-+ (NSString *)authorStringFromXMLNode:(NSXMLNode *)xmlNode
searchXPath:(NSString *)xPath {
- NSError *error = nil;
- NSArray *authorNodes = [xmlNode nodesForXPath:xPath error:&error];
- NSMutableArray *authorStrings = [NSMutableArray array];
- NSXMLNode *node;
- for (node in authorNodes) {
- [authorStrings addObject:[node stringValue]];
- }
- return [authorStrings componentsJoinedByAnd];
-}
-
-+ (BibItem *)newItemFromXMLDocument:(NSXMLDocument *)xmlDocument
fromURL:(NSURL *)url error:(NSError **)outError{
++ (BibItem *)newItemFromDocument:(DOMDocument *)domDocument fromURL:(NSURL
*)url error:(NSError **)outError{
- NSXMLNode *xmlNode = [xmlDocument rootElement];
+ DOMNode *node = [domDocument documentElement];
NSMutableDictionary *pubFields = [NSMutableDictionary dictionary];
NSMutableArray *filesArray = nil;
NSString *pubType = BDSKMiscString;
// set publication type
- NSString *pubTypeGuess = [xmlNode
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='primary']/a/@title" addTo:nil forKey:nil];
+ NSString *pubTypeGuess = [[[node
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='primary']/a/@title"] firstObject] stringValue];
if (pubTypeGuess != nil) {
if ([pubTypeGuess isEqualToString:@"Link to the Book of this
Chapter"]) {
pubType = BDSKChapterString;
@@ -88,18 +74,28 @@
}
// set title
- [xmlNode searchXPath:@".//div[@id='ContentHeading']/div[@class='heading
primitive']/div[@class='text']/h1" addTo:pubFields forKey:BDSKTitleString];
- // set book or journal
+ NSString *title = [[[node
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading
primitive']/div[@class='text']/h1"] firstObject] stringValue];
+ if (title != nil)
+ [pubFields setObject:title forKey:BDSKTitleString];
+
+ // set book or journal
if ([pubType isEqualToString:BDSKChapterString]) {
- [xmlNode
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='primary']/a" addTo:pubFields
forKey:BDSKBooktitleString];
+ NSString *chapter = [[[node
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='primary']/a"] firstObject] stringValue];
+ if (chapter != nil)
+ [pubFields setObject:chapter forKey:BDSKBooktitleString];
} else if ([pubType isEqualToString:BDSKArticleString]) {
- [xmlNode
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='primary']/a" addTo:pubFields
forKey:BDSKJournalString];
+ NSString *journal = [[[node
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='primary']/a"] firstObject] stringValue];
+ if (journal != nil)
+ [pubFields setObject:journal forKey:BDSKJournalString];
}
+
// set DOI and store for later use
- NSString *doi = [xmlNode
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']//span[@class='doi']/span[@class='value']" addTo:pubFields
forKey:BDSKDoiString];
+ NSString *doi = [[[node
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']//span[@class='doi']/span[@class='value']"] firstObject]
stringValue];
+ if (doi != nil)
+ [pubFields setObject:doi forKey:BDSKDoiString];
// set pages
- NSString *pages = [xmlNode
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']//span[@class='pagination']" addTo:pubFields
forKey:BDSKPagesString];
+ NSString *pages = [[[node
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']//span[@class='pagination']"] firstObject] stringValue];
if (pages != nil) {
AGRegex *pagesRegex = [AGRegex
regexWithPattern:@"^([0-9]*)-([0-9]*)?"];
AGRegexMatch *match = [pagesRegex findInString:pages];
@@ -112,19 +108,27 @@
[page appendString:endPage];
[pubFields setObject:page forKey:BDSKPagesString];
[page release];
+ } else {
+ [pubFields setObject:pages forKey:BDSKPagesString];
}
}
// set authors
- [pubFields setValue:[BDSKSpringerParser authorStringFromXMLNode:xmlNode
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading
primitive']/div[@class='text']/p[@class='authors']/a"] forKey:BDSKAuthorString];
+ NSString *authors = [[[node
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading
primitive']/div[@class='text']/p[@class='authors']/a"]
valueForKey:@"stringValue"] componentsJoinedByAnd];
+ if (authors != nil)
+ [pubFields setValue:authors forKey:BDSKAuthorString];
// set editors
- [pubFields setValue:[BDSKSpringerParser authorStringFromXMLNode:xmlNode
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading
primitive']/div[@class='text']/p[@class='editors']/a"] forKey:BDSKEditorString];
+ NSString *editors = [[[node
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading
primitive']/div[@class='text']/p[@class='editors']/a"]
valueForKey:@"stringValue"] componentsJoinedByAnd];
+ if (editors != nil)
+ [pubFields setValue:editors forKey:BDSKEditorString];
// set series
if ([pubType isEqualToString:BDSKChapterString]) {
- [xmlNode
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='secondary']/a" addTo:pubFields
forKey:BDSKSeriesString];
+ NSString *series = [[[node
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='secondary']/a"] firstObject] stringValue];
+ if (series != nil)
+ [pubFields setObject:series forKey:BDSKSeriesString];
}
// volume, number, and year
- NSString *vyString = [xmlNode
searchXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='secondary']" addTo:nil forKey:nil];
+ NSString *vyString = [[[node
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='secondary']"] firstObject] stringValue];
if (vyString != nil) {
// parse volume number
AGRegex *volRegex = [AGRegex regexWithPattern:@"Volume
([0-9]*)[^0-9]"];
@@ -171,6 +175,8 @@
}
++ (NSString *)citationNodeXPath { return @"./body//a[@data-gtmlabel='BIB']"; }
+
+ (NSString *)name {return @"SpringerLink"; }
+ (NSString *)address { return @"https://link.springer.com/"; }
Modified: trunk/bibdesk/BDSKWebParser.h
===================================================================
--- trunk/bibdesk/BDSKWebParser.h 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKWebParser.h 2018-08-22 14:19:07 UTC (rev 22509)
@@ -53,7 +53,6 @@
@interface BDSKWebParser : NSObject {
DOMDocument *domDocument;
- NSXMLDocument *xmlDocument;
NSURL *URL;
id<BDSKWebParserDelegate> delegate;
BOOL finishedStarting;
@@ -71,7 +70,6 @@
#pragma mark Concrete web parser
@property (nonatomic, readonly) DOMDocument *domDocument;
-@property (nonatomic, readonly) NSXMLDocument *xmlDocument;
@property (nonatomic, readonly) NSURL *URL;
@property (nonatomic, readonly) id<BDSKWebParserDelegate> delegate;
@@ -79,7 +77,7 @@
// set at the end of -start, to know thereis not more coming, so we may finish
@property (nonatomic) BOOL finishedStarting;
-- (id)initWithDocument:(DOMDocument *)aDomDocument xmlDocument:(NSXMLDocument
*)aXmlDocument fromURL:(NSURL *)aURL;
+- (id)initWithDocument:(DOMDocument *)aDomDocument fromURL:(NSURL *)aURL;
- (void)startWithDelegate:(id<BDSKWebParserDelegate>)aDelegate;
- (void)cancel;
@@ -88,7 +86,7 @@
- (BOOL)canFinishWithItems:(NSArray *)items success:(BOOL *)success;
// this must be implemented by subclasses
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url;
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url;
// main method for subclasses to implement
- (NSArray *)itemsReturningError:(NSError **)outError;
Modified: trunk/bibdesk/BDSKWebParser.m
===================================================================
--- trunk/bibdesk/BDSKWebParser.m 2018-08-22 06:30:25 UTC (rev 22508)
+++ trunk/bibdesk/BDSKWebParser.m 2018-08-22 14:19:07 UTC (rev 22509)
@@ -65,7 +65,7 @@
@implementation BDSKWebParser
-@synthesize domDocument, xmlDocument, URL, delegate, finishedStarting;
+@synthesize domDocument, URL, delegate, finishedStarting;
+ (NSArray *)parsers {
static NSArray *webParsers = nil;
@@ -98,6 +98,7 @@
// entry point for web group
+ (BDSKWebParser *)parserForDocument:(DOMDocument *)domDocument fromURL:(NSURL
*)url error:(NSError **)outError{
+ /*
NSError *error = nil;
NSString *htmlString = [(id)[domDocument documentElement] outerHTML];
@@ -118,10 +119,11 @@
if(outError) *outError = error;
return nil;
}
+ */
Class parserClass = Nil;
for (parserClass in [self parsers]) {
- if ([parserClass canParseDocument:domDocument xmlDocument:xmlDoc
fromURL:url])
+ if ([parserClass canParseDocument:domDocument fromURL:url])
break;
}
@@ -136,7 +138,7 @@
BDSKASSERT([parserClass isSubclassOfClass:[BDSKWebParser class]]);
- return [[[parserClass alloc] initWithDocument:domDocument
xmlDocument:xmlDoc fromURL:url] autorelease];
+ return [[[parserClass alloc] initWithDocument:domDocument fromURL:url]
autorelease];
}
+ (NSArray *)parsersForFeature:(BDSKParserFeature)feature {
@@ -156,8 +158,8 @@
return nil;
}
-- (id)initWithDocument:(DOMDocument *)aDomDocument xmlDocument:(NSXMLDocument
*)aXmlDocument fromURL:(NSURL *)aURL {
- if (aDomDocument == nil || aXmlDocument == nil || aURL == nil) {
+- (id)initWithDocument:(DOMDocument *)aDomDocument fromURL:(NSURL *)aURL {
+ if (aDomDocument == nil || aURL == nil) {
[self release];
return nil;
}
@@ -164,7 +166,6 @@
self = [super init];
if (self) {
domDocument = [aDomDocument retain];
- xmlDocument = [aXmlDocument retain];
URL = [aURL retain];
}
return self;
@@ -174,7 +175,6 @@
- (void)dealloc {
delegate = nil;
BDSKDESTROY(domDocument);
- BDSKDESTROY(xmlDocument);
BDSKDESTROY(URL);
[super dealloc];
}
@@ -215,7 +215,7 @@
- (NSArray *)itemsReturningError:(NSError **)outError { return nil; }
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url { return NO; }
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url {
return NO; }
+ (NSString *)name {
NSString *name = NSStringFromClass(self);
Modified: trunk/bibdesk/BDSKZentralblattParser.m
===================================================================
--- trunk/bibdesk/BDSKZentralblattParser.m 2018-08-22 06:30:25 UTC (rev
22508)
+++ trunk/bibdesk/BDSKZentralblattParser.m 2018-08-22 14:19:07 UTC (rev
22509)
@@ -50,7 +50,7 @@
// Zentralblatt Math is mirrored across several servers. See
http://www.zentralblatt-math.org/zmath/en/mirrors/ .
// Accept URLs whose path begins with zmath. As sometimes paths begin with
multiple slashes, trim those first.
-+ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument
*)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
if ([url hasFirstPathComponent:@"zmath"] == NO)
return NO;
@@ -57,7 +57,7 @@
AGRegex *ZMathRegexp = [AGRegex regexWithPattern:@"(Zbl|JFM)
(pre)?([0-9.]*)" options:AGRegexMultiline];
- return nil != [ZMathRegexp findInString:[xmlDocument XMLString]];
+ return nil != [ZMathRegexp findInString:[[domDocument documentElement]
innerHTML]];
}
// Find occurrences of strings Zbl [pre]1234.56789 or JFM 12.3456.78 on the
page.
@@ -66,7 +66,7 @@
- (NSArray *)itemsReturningError:(NSError **)outError {
AGRegex *ZMathRegexp = [AGRegex regexWithPattern:@"(Zbl|JFM)
(pre)?([0-9.]*)" options:AGRegexMultiline];
- NSArray * regexpResults = [ZMathRegexp findAllInString:[[self
xmlDocument] XMLString]];
+ NSArray * regexpResults = [ZMathRegexp findAllInString:[[[self
domDocument] documentElement] innerHTML]];
NSArray * requests = nil;
@@ -113,7 +113,7 @@
// If the referring URL's path begins with '/zmath', assume we are using a
Zentralblatt mirror server before and continue using that.
// If not, use the default server instead.
NSString * serverName = [[referrer host] lowercaseString];
- if ( [BDSKZentralblattParser canParseDocument:nil xmlDocument:nil
fromURL:referrer] ) {
+ if ( [BDSKZentralblattParser canParseDocument:nil fromURL:referrer] ) {
if ( [[referrer path] rangeOfString:@"/zmath/ZMATH"].location !=
NSNotFound ) {
// some mirrors' paths begin with /ZMATH, add that
serverName = [serverName stringByAppendingString:@"/ZMATH"];
Modified: trunk/bibdesk/DOMNode_BDSKExtensions.h
===================================================================
--- trunk/bibdesk/DOMNode_BDSKExtensions.h 2018-08-22 06:30:25 UTC (rev
22508)
+++ trunk/bibdesk/DOMNode_BDSKExtensions.h 2018-08-22 14:19:07 UTC (rev
22509)
@@ -42,14 +42,11 @@
@interface DOMNode (BDSKExtensions)
- (NSArray *)nodesForXPath:(NSString *)xpath;
+
- (NSString *)stringValue;
-- (NSString *)stringValueOfAttribute:(NSString *)attrName;
+- (NSString *)stringValuePreservingBreaks;
+- (NSString *)fullStringValueIfABBR;
- (NSArray *)descendantOrSelfNodesWithClassName:(NSString *)className;
-- (BOOL)hasParentWithClassName:(NSString *)class;
-- (NSArray *)classNames;
-- (NSString *)fullStringValueIfABBR;
-- (NSString *)searchXPath:(NSString *)searchPath addTo:(NSMutableDictionary
*)dict forKey:(NSString *)key;
-- (NSString *)searchXPath:(NSString *)searchPath addTo:(NSMutableDictionary
*)dict forKey:(NSString *)key last:(BOOL)last;
-- (NSString *)textStringValue;
+- (BOOL)hasParentWithClassName:(NSString *)className;
@end
Modified: trunk/bibdesk/DOMNode_BDSKExtensions.m
===================================================================
--- trunk/bibdesk/DOMNode_BDSKExtensions.m 2018-08-22 06:30:25 UTC (rev
22508)
+++ trunk/bibdesk/DOMNode_BDSKExtensions.m 2018-08-22 14:19:07 UTC (rev
22509)
@@ -43,135 +43,74 @@
- (NSArray *)nodesForXPath:(NSString *)xpath {
DOMXPathResult *result = [[self ownerDocument] evaluate:xpath
contextNode:self resolver:nil type:DOM_ANY_TYPE inResult:nil];
- DOMNode *node;
+ DOMNode *node = [result iterateNext];
NSMutableArray *nodes = nil;
- while ((node = [result iterateNext])) {
- if (nodes == nil)
- nodes = [NSMutableArray array];
- [nodes addObject:node];
+ if (node) {
+ nodes = [NSMutableArray array];
+ do {
+ [nodes addObject:node];
+ } while ((node = [result iterateNext]));
}
return nodes;
}
+// DOM keeps all spaces and newlines as in the html, rather than as they are
interpreted
- (NSString *)stringValue {
- return [self textContent];
+ return [[self textContent]
stringByCollapsingWhitespaceAndNewlinesAndRemovingSurroundingWhitespaceAndNewlines]
?: @"";
}
-- (NSString *)stringValueOfAttribute:(NSString *)attrName {
- NSString *path = [NSString stringWithFormat:@"./@%@", attrName];
- NSArray *atts = [self nodesForXPath:path];
- if ([atts count] == 0) return nil;
- return [[atts objectAtIndex:0] textContent];
-}
-
-- (NSArray *)descendantOrSelfNodesWithClassName:(NSString *)className {
- NSString *path = [NSString stringWithFormat:@".//*[contains(concat(' ',
normalize-space(@class), ' '), ' %@ ')]", className];
- NSArray *ar = [self nodesForXPath:path];
- return ar;
-}
-
-- (BOOL)hasParentWithClassName:(NSString *)className {
+- (NSString *)stringValuePreservingBreaks {
+ DOMNodeList *children = [self childNodes];
+ NSUInteger i, iMax = [children length];
- DOMNode *parent = [self parentNode];
+ if (iMax == 0)
+ return [self stringValue];
- do {
- if ([parent nodeType] != DOM_ELEMENT_NODE) return NO; // handles root
node
-
- if ([[parent classNames] containsObject:className])
- return YES;
-
- } while ((parent = [parent parentNode]));
+ NSMutableString *string = [NSMutableString string];
- return NO;
+ for (i = 0; i < iMax; i++) {
+ DOMNode *node = [children item:i];
+ short type = [node nodeType];
+ if (type != DOM_ELEMENT_NODE && type != DOM_TEXT_NODE) continue;
+ if (type == DOM_ELEMENT_NODE && [[node nodeName]
isCaseInsensitiveEqual:@"br"]) {
+ [string appendString:@"\n"];
+ } else {
+ NSString *s = [node stringValuePreservingBreaks];
+ if ([s length]) {
+ if ([string length] && [string lastCharacter] != '\n' &&
[string firstCharacter] != '\n')
+ [string appendString:@" "];
+ [string appendString:s];
+ }
+ }
+ }
+ return string;
}
-- (NSArray *)classNames {
-
- if([self nodeType] != DOM_ELEMENT_NODE) [NSException
raise:NSInvalidArgumentException format:@"wrong node kind"];
-
- NSMutableArray *array = [NSMutableArray arrayWithCapacity:0];
- NSError *err = nil;
- NSArray *classNodes = [self nodesForXPath:@"@class"];
-
- if ([classNodes count] == 0)
- return array;
-
- NSAssert ([classNodes count] == 1, @"too many nodes in classNodes");
-
- NSXMLNode *classNode = [classNodes objectAtIndex:0];
-
- [array addObjectsFromArray:[[classNode stringValue]
componentsSeparatedByString:@" "]];
-
- return array;
-}
-
- (NSString *)fullStringValueIfABBR {
- NSError *err;
- if([self nodeType] != DOM_ELEMENT_NODE) [NSException
raise:NSInvalidArgumentException format:@"wrong node kind"];
-
- if ([[[self nodeName] lowercaseString] isEqualToString:@"abbr"]){
+ if ([[self nodeName] isCaseInsensitiveEqual:@"abbr"]){
//todo: will need more robust comparison for namespaced node titles.
-
// return value of title attribute instead
- NSArray *titleNodes = [self nodesForXPath:@"@title"];
- if ([titleNodes count] > 0)
- return [[titleNodes firstObject] stringValue];
+ NSString *title = [(DOMElement *)self getAttribute:@"title"];
+ if (title)
+ return title;
}
-
return [self stringValue];
}
-- (NSString *)searchXPath:(NSString *)searchPath addTo:(NSMutableDictionary
*)dict forKey:(NSString *)key {
- return [self searchXPath:searchPath addTo:dict forKey:key last:NO];
+- (NSArray *)descendantOrSelfNodesWithClassName:(NSString *)className {
+ return [self nodesForXPath:[NSString
stringWithFormat:@".//*[contains(concat(' ',normalize-space(@class),' '),' %@
')]", className]];
}
-- (NSString *)searchXPath:(NSString *)searchPath addTo:(NSMutableDictionary
*)dict forKey:(NSString *)key last:(BOOL)last {
- NSArray *nodes = [self nodesForXPath:searchPath];
- NSString *string = nil;
-
- if (nil != nodes && 0 < [nodes count]) {
- string = [[nodes objectAtIndex:last ? ([nodes count] - 1) : 0]
stringValue];
- if (string) {
- string = [string stringByRemovingSurroundingWhitespaceAndNewlines];
- [dict setValue:string forKey:key];
- }
- }
- return string;
+- (BOOL)hasParentWithClassName:(NSString *)className {
+ DOMNode *parent = [self parentNode];
+ do {
+ if ([parent nodeType] != DOM_ELEMENT_NODE) return NO; // handles root
node
+ if ([[[(DOMElement *)parent getAttribute:@"class"]
componentsSeparatedByString:@" "] containsObject:className])
+ return YES;
+ } while ((parent = [parent parentNode]));
+ return NO;
}
-static void appendTextNodes(NSMutableString *string, DOMNode *node) {
- if ([node nodeType] == DOM_TEXT_NODE)
- [string appendString:[node stringValue]];
- else if ([[[node nodeName] lowercaseString] isEqualToString:@"script"] ==
NO) {
- DOMNodeList *children = [node childNodes];
- unsigned i, iMax = [children length];
- for (i = 0; i < iMax; i++)
- appendTextNodes(string, [children item:i]);
- }
-}
-
-- (NSString *)textStringValue {
- NSMutableString *text = [NSMutableString string];
- appendTextNodes(text, self);
- return text;
-}
-
@end
-
-
-@interface DOMDocument (BDSKExtensions)
-@end
-
-@implementation DOMDocument (BDSKExtensions)
-
-- (NSString *)textStringValue {
- NSError *error;
- NSArray *body = [self nodesForXPath:@"./html/body"];
- if ([body count] == 1)
- return [[body firstObject] textStringValue];
- return [[self documentElement] textStringValue];
-}
-
-@end
Modified: trunk/bibdesk/NSString_BDSKExtensions.h
===================================================================
--- trunk/bibdesk/NSString_BDSKExtensions.h 2018-08-22 06:30:25 UTC (rev
22508)
+++ trunk/bibdesk/NSString_BDSKExtensions.h 2018-08-22 14:19:07 UTC (rev
22509)
@@ -463,7 +463,7 @@
- (NSString *)stringByRemovingSurroundingWhitespace;
- (NSString *)stringByCollapsingWhitespaceAndRemovingSurroundingWhitespace;
- (NSString *)stringByRemovingSurroundingWhitespaceAndNewlines;
-
+- (NSString
*)stringByCollapsingWhitespaceAndNewlinesAndRemovingSurroundingWhitespaceAndNewlines;
- (NSString *)fullyEncodeAsIURI;
- (NSString *)stringByRemovingAliens;
Modified: trunk/bibdesk/NSString_BDSKExtensions.m
===================================================================
--- trunk/bibdesk/NSString_BDSKExtensions.m 2018-08-22 06:30:25 UTC (rev
22508)
+++ trunk/bibdesk/NSString_BDSKExtensions.m 2018-08-22 14:19:07 UTC (rev
22509)
@@ -1340,6 +1340,10 @@
return [self stringByCollapsingAndTrimmingCharactersInSet:[NSCharacterSet
whitespaceCharacterSet]];
}
+- (NSString
*)stringByCollapsingWhitespaceAndNewlinesAndRemovingSurroundingWhitespaceAndNewlines
{
+ return [self stringByCollapsingAndTrimmingCharactersInSet:[NSCharacterSet
whitespaceAndNewlineCharacterSet]];
+}
+
// This method is copied and modified from NSString-OFStringExtensions.m
- (NSString *)fullyEncodeAsIURI {
static const char hexDigits[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit