Revision: 22323
          http://sourceforge.net/p/bibdesk/svn/22323
Author:   hofman
Date:     2018-06-18 13:54:01 +0000 (Mon, 18 Jun 2018)
Log Message:
-----------
Extend arrive web parser to support search results

Modified Paths:
--------------
    trunk/bibdesk/BDSKArxivParser.m

Modified: trunk/bibdesk/BDSKArxivParser.m
===================================================================
--- trunk/bibdesk/BDSKArxivParser.m     2018-06-18 06:30:26 UTC (rev 22322)
+++ trunk/bibdesk/BDSKArxivParser.m     2018-06-18 13:54:01 UTC (rev 22323)
@@ -53,12 +53,13 @@
         return NO;
     
     BOOL isAbstract = [url hasFirstPathComponent:@"abs"];
-    NSString *containsArxivLinkNode = isAbstract ? 
@"./body/div[@id='content']/div[@id='abs']/div[@class='leftcolumn']/div[@class='metatable']/table//td[@class='tablecell
 arxivid']" : 
@"./body/div[@id='content']/div/dl/dt/span[@class='list-identifier']";
+    BOOL isSearch = [url hasFirstPathComponent:@"search"];
+    NSString *containsArxivLinkNode = isAbstract ? 
@"./body/div[@id='content']/div[@id='abs']/div[@class='leftcolumn']/div[@class='metatable']/table//td[@class='tablecell
 arxivid']" : isSearch ? 
@"./body//ol[@class='breathe-horizontal']/li[@class='arxiv-result']" : 
@"./body/div[@id='content']/div/dl/dt/span[@class='list-identifier']";
     
     NSError *error = nil;    
-
+    
     NSInteger nodecount = [[[xmlDocument rootElement] 
nodesForXPath:containsArxivLinkNode error:&error] count];
-
+    
     return nodecount > 0;
 }
 
@@ -72,7 +73,8 @@
     NSURL *url = [self URL];
     
     BOOL isAbstract = [url hasFirstPathComponent:@"abs"];
-    
+    BOOL isSearch = [url hasFirstPathComponent:@"search"];
+
     NSString *arxivSearchResultNodePath = 
@"./body/div[@id='content']/div/dl/dt";
     
     NSString *arxivLinkNodePath = @"./span[@class='list-identifier']";
@@ -95,6 +97,17 @@
         authorsNodePath = @"./div[@class='authors']/a";
         journalNodePath = 
@"./div[@class='metatable']/table//td[@class='tablecell jref']";
         abstractNodePath = @"./blockquote[contains(concat(' 
',normalize-space(@class),' '),' abstract ')]";
+    } else if (isSearch) {
+        arxivSearchResultNodePath = 
@"./body//ol[@class='breathe-horizontal']/li[@class='arxiv-result']";
+        
+        arxivLinkNodePath = @"./div[contains(concat(' 
',normalize-space(@class),' '),' level ')]/p[contains(concat(' 
',normalize-space(@class),' '),' list-title ')]";
+        arxivIDNodePath = @"./a[contains(text(),'arXiv:')]";
+        
+        pdfURLNodePath = @"./span/a[contains(text(),'pdf')]";
+        titleNodePath = @"./p[contains(concat(' ',normalize-space(@class),' 
'),' title ')]";
+        authorsNodePath = @"./p[@class='authors']/a";
+        journalNodePath = @"./p[contains(concat(' ',normalize-space(@class),' 
'),' comments ')]/span[text()='Journal ref:']/..";
+        abstractNodePath = @"./p[contains(concat(' ',normalize-space(@class),' 
'),' abstract ')]/span[contains(concat(' ',normalize-space(@class),' '),' 
abstract-full ')]/text()";
     }
     
     AGRegex *eprintRegex1 = [AGRegex 
regexWithPattern:@"([0-9]{2})([0-9]{2})\\.([0-9]{4})"
@@ -143,7 +156,7 @@
         }
         
         NSXMLNode *arxivLinkNode = [arxivLinkNodes objectAtIndex:0];
-        NSXMLNode *arxivMetaNode = isAbstract ? arxivSearchResult : 
[[[arxivSearchResult nextSibling] children] firstObject];
+        NSXMLNode *arxivMetaNode = isAbstract || isSearch ? arxivSearchResult 
: [[[arxivSearchResult nextSibling] children] firstObject];
         NSArray *nodes;
         
         NSMutableDictionary *pubFields = [NSMutableDictionary dictionary];
@@ -189,7 +202,7 @@
         // search for title
         nodes = [arxivMetaNode nodesForXPath:titleNodePath error:&error];
         if (nil != nodes && 1 == [nodes count]) {
-            if ((string = [[[nodes objectAtIndex:0] childAtIndex:1] 
stringValue])) {
+            if ((string = [[[nodes objectAtIndex:0] childAtIndex:isSearch ? 0 
: 1] stringValue])) {
                 string = [string 
stringByRemovingSurroundingWhitespaceAndNewlines];
                 [pubFields setValue:string forKey:BDSKTitleString];
             }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Bibdesk-commit mailing list
Bibdesk-commit@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit

Reply via email to