Revision: 22501
          http://sourceforge.net/p/bibdesk/svn/22501
Author:   hofman
Date:     2018-08-18 17:20:48 +0000 (Sat, 18 Aug 2018)
Log Message:
-----------
Bring back domDocument ivar in web parser. As yet unused, but we probably want 
to replace the xmlDocument with domDocument, as the NSXML library is very buggy

Modified Paths:
--------------
    trunk/bibdesk/BDSKArxivParser.m
    trunk/bibdesk/BDSKAsynchronousWebParser.m
    trunk/bibdesk/BDSKBibTeXWebParser.m
    trunk/bibdesk/BDSKCOinSParser.m
    trunk/bibdesk/BDSKDOIWebParser.m
    trunk/bibdesk/BDSKGoogleScholarParser.m
    trunk/bibdesk/BDSKHCiteParser.m
    trunk/bibdesk/BDSKHubmedParser.m
    trunk/bibdesk/BDSKIACRParser.m
    trunk/bibdesk/BDSKIEEEXploreParser.m
    trunk/bibdesk/BDSKInspireParser.m
    trunk/bibdesk/BDSKJSTORWebParser.m
    trunk/bibdesk/BDSKMathSciNetParser.m
    trunk/bibdesk/BDSKNumdamParser.m
    trunk/bibdesk/BDSKProjectEuclidParser.m
    trunk/bibdesk/BDSKWebParser.h
    trunk/bibdesk/BDSKWebParser.m
    trunk/bibdesk/BDSKZentralblattParser.m

Modified: trunk/bibdesk/BDSKArxivParser.m
===================================================================
--- trunk/bibdesk/BDSKArxivParser.m     2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKArxivParser.m     2018-08-18 17:20:48 UTC (rev 22501)
@@ -46,7 +46,7 @@
 
 @implementation BDSKArxivParser
 
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
     
     // !!! other countries end up with e.g. fr.arxiv.org; checking for 
scholar.arxiv.com may fail in those cases
     if ([url hasDomain:@"arxiv.org"] == NO)

Modified: trunk/bibdesk/BDSKAsynchronousWebParser.m
===================================================================
--- trunk/bibdesk/BDSKAsynchronousWebParser.m   2018-08-18 06:30:27 UTC (rev 
22500)
+++ trunk/bibdesk/BDSKAsynchronousWebParser.m   2018-08-18 17:20:48 UTC (rev 
22501)
@@ -62,7 +62,7 @@
     return [self finishedStarting] && [downloads count] == 0;
 }
 
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
     NSString *host = [[[NSURL URLWithString:[self address]] host] 
lowercaseString];
     
     if ([url hasDomain:host] == NO)

Modified: trunk/bibdesk/BDSKBibTeXWebParser.m
===================================================================
--- trunk/bibdesk/BDSKBibTeXWebParser.m 2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKBibTeXWebParser.m 2018-08-18 17:20:48 UTC (rev 22501)
@@ -46,7 +46,7 @@
 
 @implementation BDSKBibTeXWebParser
 
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
     
     NSString *text = [xmlDocument textStringValue];
        AGRegex *bibtexRegex = [AGRegex regexWithPattern:@"@[[:alpha:]]+[ 
\\t]*[{(]"];

Modified: trunk/bibdesk/BDSKCOinSParser.m
===================================================================
--- trunk/bibdesk/BDSKCOinSParser.m     2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKCOinSParser.m     2018-08-18 17:20:48 UTC (rev 22501)
@@ -66,7 +66,7 @@
 
 
 // Claim that the can parse the document if its markup contains the string 
Z3988.
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
        
     NSError *error;
     NSArray *nodes = [[xmlDocument rootElement] 
nodesForXPath:@"./body//span[@class='Z3988']" error:&error];

Modified: trunk/bibdesk/BDSKDOIWebParser.m
===================================================================
--- trunk/bibdesk/BDSKDOIWebParser.m    2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKDOIWebParser.m    2018-08-18 17:20:48 UTC (rev 22501)
@@ -45,7 +45,7 @@
 
 @implementation BDSKDOIWebParser
 
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
     NSXMLNode *rootElement = [xmlDocument rootElement];
     NSError *error;
     NSString *doiXPath;

Modified: trunk/bibdesk/BDSKGoogleScholarParser.m
===================================================================
--- trunk/bibdesk/BDSKGoogleScholarParser.m     2018-08-18 06:30:27 UTC (rev 
22500)
+++ trunk/bibdesk/BDSKGoogleScholarParser.m     2018-08-18 17:20:48 UTC (rev 
22501)
@@ -43,7 +43,7 @@
 
 + (NSString *)citationNodeXPath { return 
@"./body//a[contains(text(),'BibTeX')]"; }
 
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
     // !!! other countries end up with e.g. scholar.google.be; checking for 
scholar.google.com may fail in those cases
     // also some sites access google scholar via an ezproxy, so the suffix 
could be quite complex
     if (nil == [url host] || NO == [[[url host] lowercaseString] 
hasPrefix:@"scholar.google."] || 

Modified: trunk/bibdesk/BDSKHCiteParser.m
===================================================================
--- trunk/bibdesk/BDSKHCiteParser.m     2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKHCiteParser.m     2018-08-18 17:20:48 UTC (rev 22501)
@@ -52,7 +52,7 @@
 
 @implementation BDSKHCiteParser
 
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
     
     NSError *error = nil;
     return [[[xmlDocument rootElement] 
descendantOrSelfNodesWithClassName:@"hcite" error:&error] count] > 0;

Modified: trunk/bibdesk/BDSKHubmedParser.m
===================================================================
--- trunk/bibdesk/BDSKHubmedParser.m    2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKHubmedParser.m    2018-08-18 17:20:48 UTC (rev 22501)
@@ -44,7 +44,7 @@
 
 @implementation BDSKHubmedParser
 
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
     
     if ([url host] == nil || [[url host] 
isCaseInsensitiveEqual:@"www.hubmed.org"] == NO || 
         [[url path] isCaseInsensitiveEqual:@"/display.cgi"] == NO){

Modified: trunk/bibdesk/BDSKIACRParser.m
===================================================================
--- trunk/bibdesk/BDSKIACRParser.m      2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKIACRParser.m      2018-08-18 17:20:48 UTC (rev 22501)
@@ -45,7 +45,7 @@
 
 @implementation BDSKIACRParser
 
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
     
     if ([url hasDomain:@"eprint.iacr.org"] == NO)
         return NO;

Modified: trunk/bibdesk/BDSKIEEEXploreParser.m
===================================================================
--- trunk/bibdesk/BDSKIEEEXploreParser.m        2018-08-18 06:30:27 UTC (rev 
22500)
+++ trunk/bibdesk/BDSKIEEEXploreParser.m        2018-08-18 17:20:48 UTC (rev 
22501)
@@ -51,7 +51,7 @@
 
 @implementation BDSKIEEEXploreParser
 
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
     
     if (nil == [url host] || [[url host] 
isCaseInsensitiveEqual:@"ieeexplore.ieee.org"] == NO)
         return NO;

Modified: trunk/bibdesk/BDSKInspireParser.m
===================================================================
--- trunk/bibdesk/BDSKInspireParser.m   2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKInspireParser.m   2018-08-18 17:20:48 UTC (rev 22501)
@@ -60,7 +60,7 @@
     return bibtexString;
 }
 
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url {
     if ([url host] == nil || [[[url host] lowercaseString] 
isEqualToString:@"inspirehep.net"] == NO)
         return NO;
     

Modified: trunk/bibdesk/BDSKJSTORWebParser.m
===================================================================
--- trunk/bibdesk/BDSKJSTORWebParser.m  2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKJSTORWebParser.m  2018-08-18 17:20:48 UTC (rev 22501)
@@ -44,7 +44,7 @@
 
 @implementation BDSKJSTORWebParser
 
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
     
     if ([url hasDomain:@"jstor.org"] == NO)
         return NO;

Modified: trunk/bibdesk/BDSKMathSciNetParser.m
===================================================================
--- trunk/bibdesk/BDSKMathSciNetParser.m        2018-08-18 06:30:27 UTC (rev 
22500)
+++ trunk/bibdesk/BDSKMathSciNetParser.m        2018-08-18 17:20:48 UTC (rev 
22501)
@@ -50,7 +50,7 @@
 
 // MathSciNet is mirrored across different servers, don't use the server name 
to recognise the URL.
 // Instead recognise all URLs beginning with 'mathscinet', to match both 
general MatSciNet URLs like <https://www.ams.org/mathscinet/...>  and 
MathSciNet reference URLS <https://www.ams.org/mathscinet-getitem?...>.
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
     
     if ([url hasFirstPathComponent:@"mathscinet"] == NO)
         return NO;

Modified: trunk/bibdesk/BDSKNumdamParser.m
===================================================================
--- trunk/bibdesk/BDSKNumdamParser.m    2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKNumdamParser.m    2018-08-18 17:20:48 UTC (rev 22501)
@@ -47,7 +47,7 @@
 @implementation BDSKNumdamParser
 
 // Recognise Numdam pages by their server name ending in numdam.org.
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
     
     if ([url hasDomain:@"numdam.org"] == NO)
         return NO;

Modified: trunk/bibdesk/BDSKProjectEuclidParser.m
===================================================================
--- trunk/bibdesk/BDSKProjectEuclidParser.m     2018-08-18 06:30:27 UTC (rev 
22500)
+++ trunk/bibdesk/BDSKProjectEuclidParser.m     2018-08-18 17:20:48 UTC (rev 
22501)
@@ -47,7 +47,7 @@
 @implementation BDSKProjectEuclidParser
 
 // Recognise Project Euclid pages by their server name ending in 
projecteuclid.org.
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
        
     if ([url hasDomain:@"projecteuclid.org"] == NO)
         return NO;

Modified: trunk/bibdesk/BDSKWebParser.h
===================================================================
--- trunk/bibdesk/BDSKWebParser.h       2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKWebParser.h       2018-08-18 17:20:48 UTC (rev 22501)
@@ -52,6 +52,7 @@
 @protocol BDSKWebParserDelegate;
 
 @interface BDSKWebParser : NSObject {
+    DOMDocument *domDocument;
     NSXMLDocument *xmlDocument;
     NSURL *URL;
     id<BDSKWebParserDelegate> delegate;
@@ -69,6 +70,7 @@
 
 #pragma mark Concrete web parser
 
+@property (nonatomic, readonly) DOMDocument *domDocument;
 @property (nonatomic, readonly) NSXMLDocument *xmlDocument;
 @property (nonatomic, readonly) NSURL *URL;
 
@@ -77,7 +79,7 @@
 // set at the end of -start, to know thereis not more coming, so we may finish
 @property (nonatomic) BOOL finishedStarting;
 
-- (id)initWithDocument:(NSXMLDocument *)aXmlDocument fromURL:(NSURL *)aURL;
+- (id)initWithDocument:(DOMDocument *)aDomDocument xmlDocument:(NSXMLDocument 
*)aXmlDocument fromURL:(NSURL *)aURL;
 
 - (void)startWithDelegate:(id<BDSKWebParserDelegate>)aDelegate;
 - (void)cancel;
@@ -86,7 +88,7 @@
 - (BOOL)canFinishWithItems:(NSArray *)items success:(BOOL *)success;
 
 // this must be implemented by subclasses
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url;
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url;
 
 // main method for subclasses to implement
 - (NSArray *)itemsReturningError:(NSError **)outError;

Modified: trunk/bibdesk/BDSKWebParser.m
===================================================================
--- trunk/bibdesk/BDSKWebParser.m       2018-08-18 06:30:27 UTC (rev 22500)
+++ trunk/bibdesk/BDSKWebParser.m       2018-08-18 17:20:48 UTC (rev 22501)
@@ -64,7 +64,7 @@
 
 @implementation BDSKWebParser
 
-@synthesize xmlDocument, URL, delegate, finishedStarting;
+@synthesize domDocument, xmlDocument, URL, delegate, finishedStarting;
 
 + (NSArray *)parsers {
     static NSArray *webParsers = nil;
@@ -120,7 +120,7 @@
     
     Class parserClass = Nil;
     for (parserClass in [self parsers]) {
-        if ([parserClass canParseDocument:xmlDoc fromURL:url])
+        if ([parserClass canParseDocument:domDocument xmlDocument:xmlDoc 
fromURL:url])
             break;
     }
     
@@ -135,7 +135,7 @@
     
     BDSKASSERT([parserClass isSubclassOfClass:[BDSKWebParser class]]);
 
-    return [[[parserClass alloc] initWithDocument:xmlDoc fromURL:url] 
autorelease];
+    return [[[parserClass alloc] initWithDocument:domDocument 
xmlDocument:xmlDoc fromURL:url] autorelease];
 }
 
 + (NSArray *)parsersForFeature:(BDSKParserFeature)feature {
@@ -155,13 +155,14 @@
     return nil;
 }
 
-- (id)initWithDocument:(NSXMLDocument *)aXmlDocument fromURL:(NSURL *)aURL {
-    if (aXmlDocument == nil || aURL == nil) {
+- (id)initWithDocument:(DOMDocument *)aDomDocument xmlDocument:(NSXMLDocument 
*)aXmlDocument fromURL:(NSURL *)aURL {
+    if (aDomDocument == nil || aXmlDocument == nil || aURL == nil) {
         [self release];
         return nil;
     }
     self = [super init];
     if (self) {
+        domDocument = [aDomDocument retain];
         xmlDocument = [aXmlDocument retain];
         URL = [aURL retain];
     }
@@ -171,6 +172,7 @@
 
 - (void)dealloc {
     delegate = nil;
+    BDSKDESTROY(domDocument);
     BDSKDESTROY(xmlDocument);
     BDSKDESTROY(URL);
     [super dealloc];
@@ -212,7 +214,7 @@
 
 - (NSArray *)itemsReturningError:(NSError **)outError { return nil; }
 
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url { 
return NO; }
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url { return NO; }
 
 + (NSString *)name {
     NSString *name = NSStringFromClass(self);

Modified: trunk/bibdesk/BDSKZentralblattParser.m
===================================================================
--- trunk/bibdesk/BDSKZentralblattParser.m      2018-08-18 06:30:27 UTC (rev 
22500)
+++ trunk/bibdesk/BDSKZentralblattParser.m      2018-08-18 17:20:48 UTC (rev 
22501)
@@ -50,7 +50,7 @@
 
 // Zentralblatt Math is mirrored across several servers. See 
http://www.zentralblatt-math.org/zmath/en/mirrors/ .
 // Accept URLs whose path begins with zmath. As sometimes paths begin with 
multiple slashes, trim those first.
-+ (BOOL)canParseDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
++ (BOOL)canParseDocument:(DOMDocument *)aDomDocument 
xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{
     
     if ([url hasFirstPathComponent:@"zmath"] == NO)
         return NO;
@@ -113,7 +113,7 @@
     // If the referring URL's path begins with '/zmath', assume we are using a 
Zentralblatt mirror server before and continue using that.
     // If not, use the default server instead.
     NSString * serverName = [[referrer host] lowercaseString];
-    if ( [BDSKZentralblattParser canParseDocument:nil fromURL:referrer] ) {
+    if ( [BDSKZentralblattParser canParseDocument:nil xmlDocument:nil 
fromURL:referrer] ) {
         if ( [[referrer path] rangeOfString:@"/zmath/ZMATH"].location != 
NSNotFound ) {
             // some mirrors' paths begin with /ZMATH, add that
             serverName = [serverName stringByAppendingString:@"/ZMATH"];

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit

Reply via email to