Revision: 22501
http://sourceforge.net/p/bibdesk/svn/22501
Author: hofman
Date: 2018-08-18 17:20:48 +0000 (Sat, 18 Aug 2018)
Log Message:
-----------
Bring back domDocument ivar in web parser. As yet unused, but we probably want
to replace the xmlDocument with domDocument, as the NSXML library is very buggy
Modified Paths:
--------------
trunk/bibdesk/BDSKArxivParser.m
trunk/bibdesk/BDSKAsynchronousWebParser.m
trunk/bibdesk/BDSKBibTeXWebParser.m
trunk/bibdesk/BDSKCOinSParser.m
trunk/bibdesk/BDSKDOIWebParser.m
trunk/bibdesk/BDSKGoogleScholarParser.m
trunk/bibdesk/BDSKHCiteParser.m
trunk/bibdesk/BDSKHubmedParser.m
trunk/bibdesk/BDSKIACRParser.m
trunk/bibdesk/BDSKIEEEXploreParser.m
trunk/bibdesk/BDSKInspireParser.m
trunk/bibdesk/BDSKJSTORWebParser.m
trunk/bibdesk/BDSKMathSciNetParser.m
trunk/bibdesk/BDSKNumdamParser.m
trunk/bibdesk/BDSKProjectEuclidParser.m
trunk/bibdesk/BDSKWebParser.h
trunk/bibdesk/BDSKWebParser.m
trunk/bibdesk/BDSKZentralblattParser.m
Modified: trunk/bibdesk/BDSKArxivParser.m
===================================================================
--- trunk/bibdesk/BDSKArxivParser.m 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKArxivParser.m 2018-08-18 17:20:48 UTC (rev 22501)
@@ -46,7 +46,7 @@
@implementation BDSKArxivParser
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
// !!! other countries end up with e.g. fr.arxiv.org; checking for
scholar.arxiv.com may fail in those cases
if ([url hasDomain:@"arxiv.org"] == NO)
Modified: trunk/bibdesk/BDSKAsynchronousWebParser.m
===================================================================
--- trunk/bibdesk/BDSKAsynchronousWebParser.m 2018-08-18 06:30:27 UTC (rev
22500)
+++ trunk/bibdesk/BDSKAsynchronousWebParser.m 2018-08-18 17:20:48 UTC (rev
22501)
@@ -62,7 +62,7 @@
return [self finishedStarting] && [downloads count] == 0;
}
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
NSString *host = [[[NSURL URLWithString:[self address]] host]
lowercaseString];
if ([url hasDomain:host] == NO)
Modified: trunk/bibdesk/BDSKBibTeXWebParser.m
===================================================================
--- trunk/bibdesk/BDSKBibTeXWebParser.m 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKBibTeXWebParser.m 2018-08-18 17:20:48 UTC (rev 22501)
@@ -46,7 +46,7 @@
@implementation BDSKBibTeXWebParser
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
NSString *text = [xmlDocument textStringValue];
AGRegex *bibtexRegex = [AGRegex regexWithPattern:@"@[[:alpha:]]+[
\\t]*[{(]"];
Modified: trunk/bibdesk/BDSKCOinSParser.m
===================================================================
--- trunk/bibdesk/BDSKCOinSParser.m 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKCOinSParser.m 2018-08-18 17:20:48 UTC (rev 22501)
@@ -66,7 +66,7 @@
// Claim that the can parse the document if its markup contains the string
Z3988.
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
NSError *error;
NSArray *nodes = [[xmlDocument rootElement]
nodesForXPath:@"./body//span[@class='Z3988']" error:&error];
Modified: trunk/bibdesk/BDSKDOIWebParser.m
===================================================================
--- trunk/bibdesk/BDSKDOIWebParser.m 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKDOIWebParser.m 2018-08-18 17:20:48 UTC (rev 22501)
@@ -45,7 +45,7 @@
@implementation BDSKDOIWebParser
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
NSXMLNode *rootElement = [xmlDocument rootElement];
NSError *error;
NSString *doiXPath;
Modified: trunk/bibdesk/BDSKGoogleScholarParser.m
===================================================================
--- trunk/bibdesk/BDSKGoogleScholarParser.m 2018-08-18 06:30:27 UTC (rev
22500)
+++ trunk/bibdesk/BDSKGoogleScholarParser.m 2018-08-18 17:20:48 UTC (rev
22501)
@@ -43,7 +43,7 @@
+ (NSString *)citationNodeXPath { return
@"./body//a[contains(text(),'BibTeX')]"; }
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
// !!! other countries end up with e.g. scholar.google.be; checking for
scholar.google.com may fail in those cases
// also some sites access google scholar via an ezproxy, so the suffix
could be quite complex
if (nil == [url host] || NO == [[[url host] lowercaseString]
hasPrefix:@"scholar.google."] ||
Modified: trunk/bibdesk/BDSKHCiteParser.m
===================================================================
--- trunk/bibdesk/BDSKHCiteParser.m 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKHCiteParser.m 2018-08-18 17:20:48 UTC (rev 22501)
@@ -52,7 +52,7 @@
@implementation BDSKHCiteParser
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
NSError *error = nil;
return [[[xmlDocument rootElement]
descendantOrSelfNodesWithClassName:@"hcite" error:&error] count] > 0;
Modified: trunk/bibdesk/BDSKHubmedParser.m
===================================================================
--- trunk/bibdesk/BDSKHubmedParser.m 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKHubmedParser.m 2018-08-18 17:20:48 UTC (rev 22501)
@@ -44,7 +44,7 @@
@implementation BDSKHubmedParser
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
if ([url host] == nil || [[url host]
isCaseInsensitiveEqual:@"www.hubmed.org"] == NO ||
[[url path] isCaseInsensitiveEqual:@"/display.cgi"] == NO){
Modified: trunk/bibdesk/BDSKIACRParser.m
===================================================================
--- trunk/bibdesk/BDSKIACRParser.m 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKIACRParser.m 2018-08-18 17:20:48 UTC (rev 22501)
@@ -45,7 +45,7 @@
@implementation BDSKIACRParser
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
if ([url hasDomain:@"eprint.iacr.org"] == NO)
return NO;
Modified: trunk/bibdesk/BDSKIEEEXploreParser.m
===================================================================
--- trunk/bibdesk/BDSKIEEEXploreParser.m 2018-08-18 06:30:27 UTC (rev
22500)
+++ trunk/bibdesk/BDSKIEEEXploreParser.m 2018-08-18 17:20:48 UTC (rev
22501)
@@ -51,7 +51,7 @@
@implementation BDSKIEEEXploreParser
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
if (nil == [url host] || [[url host]
isCaseInsensitiveEqual:@"ieeexplore.ieee.org"] == NO)
return NO;
Modified: trunk/bibdesk/BDSKInspireParser.m
===================================================================
--- trunk/bibdesk/BDSKInspireParser.m 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKInspireParser.m 2018-08-18 17:20:48 UTC (rev 22501)
@@ -60,7 +60,7 @@
return bibtexString;
}
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
if ([url host] == nil || [[[url host] lowercaseString]
isEqualToString:@"inspirehep.net"] == NO)
return NO;
Modified: trunk/bibdesk/BDSKJSTORWebParser.m
===================================================================
--- trunk/bibdesk/BDSKJSTORWebParser.m 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKJSTORWebParser.m 2018-08-18 17:20:48 UTC (rev 22501)
@@ -44,7 +44,7 @@
@implementation BDSKJSTORWebParser
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
if ([url hasDomain:@"jstor.org"] == NO)
return NO;
Modified: trunk/bibdesk/BDSKMathSciNetParser.m
===================================================================
--- trunk/bibdesk/BDSKMathSciNetParser.m 2018-08-18 06:30:27 UTC (rev
22500)
+++ trunk/bibdesk/BDSKMathSciNetParser.m 2018-08-18 17:20:48 UTC (rev
22501)
@@ -50,7 +50,7 @@
// MathSciNet is mirrored across different servers, don't use the server name
to recognise the URL.
// Instead recognise all URLs beginning with 'mathscinet', to match both
general MatSciNet URLs like <https://www.ams.org/mathscinet/...> and
MathSciNet reference URLS <https://www.ams.org/mathscinet-getitem?...>.
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
if ([url hasFirstPathComponent:@"mathscinet"] == NO)
return NO;
Modified: trunk/bibdesk/BDSKNumdamParser.m
===================================================================
--- trunk/bibdesk/BDSKNumdamParser.m 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKNumdamParser.m 2018-08-18 17:20:48 UTC (rev 22501)
@@ -47,7 +47,7 @@
@implementation BDSKNumdamParser
// Recognise Numdam pages by their server name ending in numdam.org.
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
if ([url hasDomain:@"numdam.org"] == NO)
return NO;
Modified: trunk/bibdesk/BDSKProjectEuclidParser.m
===================================================================
--- trunk/bibdesk/BDSKProjectEuclidParser.m 2018-08-18 06:30:27 UTC (rev
22500)
+++ trunk/bibdesk/BDSKProjectEuclidParser.m 2018-08-18 17:20:48 UTC (rev
22501)
@@ -47,7 +47,7 @@
@implementation BDSKProjectEuclidParser
// Recognise Project Euclid pages by their server name ending in
projecteuclid.org.
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
if ([url hasDomain:@"projecteuclid.org"] == NO)
return NO;
Modified: trunk/bibdesk/BDSKWebParser.h
===================================================================
--- trunk/bibdesk/BDSKWebParser.h 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKWebParser.h 2018-08-18 17:20:48 UTC (rev 22501)
@@ -52,6 +52,7 @@
@protocol BDSKWebParserDelegate;
@interface BDSKWebParser : NSObject {
+ DOMDocument *domDocument;
NSXMLDocument *xmlDocument;
NSURL *URL;
id<BDSKWebParserDelegate> delegate;
@@ -69,6 +70,7 @@
#pragma mark Concrete web parser
+@property (nonatomic, readonly) DOMDocument *domDocument;
@property (nonatomic, readonly) NSXMLDocument *xmlDocument;
@property (nonatomic, readonly) NSURL *URL;
@@ -77,7 +79,7 @@
// set at the end of -start, to know thereis not more coming, so we may finish
@property (nonatomic) BOOL finishedStarting;
-- (id)initWithDocument:(NSXMLDocument *)aXmlDocument fromURL:(NSURL *)aURL;
+- (id)initWithDocument:(DOMDocument *)aDomDocument xmlDocument:(NSXMLDocument
*)aXmlDocument fromURL:(NSURL *)aURL;
- (void)startWithDelegate:(id<BDSKWebParserDelegate>)aDelegate;
- (void)cancel;
@@ -86,7 +88,7 @@
- (BOOL)canFinishWithItems:(NSArray *)items success:(BOOL *)success;
// this must be implemented by subclasses
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url;
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url;
// main method for subclasses to implement
- (NSArray *)itemsReturningError:(NSError **)outError;
Modified: trunk/bibdesk/BDSKWebParser.m
===================================================================
--- trunk/bibdesk/BDSKWebParser.m 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKWebParser.m 2018-08-18 17:20:48 UTC (rev 22501)
@@ -64,7 +64,7 @@
@implementation BDSKWebParser
-@synthesize xmlDocument, URL, delegate, finishedStarting;
+@synthesize domDocument, xmlDocument, URL, delegate, finishedStarting;
+ (NSArray *)parsers {
static NSArray *webParsers = nil;
@@ -120,7 +120,7 @@
Class parserClass = Nil;
for (parserClass in [self parsers]) {
- if ([parserClass canParseDocument:xmlDoc fromURL:url])
+ if ([parserClass canParseDocument:domDocument xmlDocument:xmlDoc
fromURL:url])
break;
}
@@ -135,7 +135,7 @@
BDSKASSERT([parserClass isSubclassOfClass:[BDSKWebParser class]]);
- return [[[parserClass alloc] initWithDocument:xmlDoc fromURL:url]
autorelease];
+ return [[[parserClass alloc] initWithDocument:domDocument
xmlDocument:xmlDoc fromURL:url] autorelease];
}
+ (NSArray *)parsersForFeature:(BDSKParserFeature)feature {
@@ -155,13 +155,14 @@
return nil;
}
-- (id)initWithDocument:(NSXMLDocument *)aXmlDocument fromURL:(NSURL *)aURL {
- if (aXmlDocument == nil || aURL == nil) {
+- (id)initWithDocument:(DOMDocument *)aDomDocument xmlDocument:(NSXMLDocument
*)aXmlDocument fromURL:(NSURL *)aURL {
+ if (aDomDocument == nil || aXmlDocument == nil || aURL == nil) {
[self release];
return nil;
}
self = [super init];
if (self) {
+ domDocument = [aDomDocument retain];
xmlDocument = [aXmlDocument retain];
URL = [aURL retain];
}
@@ -171,6 +172,7 @@
- (void)dealloc {
delegate = nil;
+ BDSKDESTROY(domDocument);
BDSKDESTROY(xmlDocument);
BDSKDESTROY(URL);
[super dealloc];
@@ -212,7 +214,7 @@
- (NSArray *)itemsReturningError:(NSError **)outError { return nil; }
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
return NO; }
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url { return NO; }
+ (NSString *)name {
NSString *name = NSStringFromClass(self);
Modified: trunk/bibdesk/BDSKZentralblattParser.m
===================================================================
--- trunk/bibdesk/BDSKZentralblattParser.m 2018-08-18 06:30:27 UTC (rev
22500)
+++ trunk/bibdesk/BDSKZentralblattParser.m 2018-08-18 17:20:48 UTC (rev
22501)
@@ -50,7 +50,7 @@
// Zentralblatt Math is mirrored across several servers. See
http://www.zentralblatt-math.org/zmath/en/mirrors/ .
// Accept URLs whose path begins with zmath. As sometimes paths begin with
multiple slashes, trim those first.
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
if ([url hasFirstPathComponent:@"zmath"] == NO)
return NO;
@@ -113,7 +113,7 @@
// If the referring URL's path begins with '/zmath', assume we are using a
Zentralblatt mirror server before and continue using that.
// If not, use the default server instead.
NSString * serverName = [[referrer host] lowercaseString];
- if ( [BDSKZentralblattParser canParseDocument:nil fromURL:referrer] ) {
+ if ( [BDSKZentralblattParser canParseDocument:nil xmlDocument:nil
fromURL:referrer] ) {
if ( [[referrer path] rangeOfString:@"/zmath/ZMATH"].location !=
NSNotFound ) {
// some mirrors' paths begin with /ZMATH, add that
serverName = [serverName stringByAppendingString:@"/ZMATH"];
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit