Revision: 28113
          http://sourceforge.net/p/bibdesk/svn/28113
Author:   hofman
Date:     2022-12-14 16:15:52 +0000 (Wed, 14 Dec 2022)
Log Message:
-----------
Get items for ACM from doi, either from meta or input elements. Add URL for 
item in ACM site.

Modified Paths:
--------------
    trunk/bibdesk/BDSKACMDLParser.m

Modified: trunk/bibdesk/BDSKACMDLParser.m
===================================================================
--- trunk/bibdesk/BDSKACMDLParser.m     2022-12-14 14:56:00 UTC (rev 28112)
+++ trunk/bibdesk/BDSKACMDLParser.m     2022-12-14 16:15:52 UTC (rev 28113)
@@ -39,44 +39,70 @@
 #import "BDSKACMDLParser.h"
 #import <AGRegex/AGRegex.h>
 #import "DOMNode_BDSKExtensions.h"
+#import "NSURL_BDSKExtensions.h"
+#import "NSString_BDSKExtensions.h"
+#import "BibItem.h"
 
 
 @implementation BDSKACMDLParser
 
-+ (NSString *)citationNodeXPath { return 
@"./head/meta[@name='citation_abstract_html_url']/@content"; }
-
-+ (NSString *)citationURLStringFromNode:(DOMNode *)node {
-    NSString *nodeString = [[NSURL URLWithString:[node stringValue]] query];
-
-    AGRegex *doiRegex = [AGRegex regexWithPattern:@"^id=([0-9]+)\\.([0-9]+)$"];
-    AGRegexMatch *match = [doiRegex findInString:nodeString];
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url {
+    if ([url hasDomain:@"dl.acm.org"] == NO)
+        return NO;
     
-    if ([match count] != 3)
+    NSString *doiNodeXPath = nil;
+    if ([url hasFirstPathComponent:@"doi"])
+        doiNodeXPath = @"./head/meta[@name='dc.Identifier' and 
@scheme='doi']/@content";
+    else if ([url hasFirstPathComponent:@"action"])
+        doiNodeXPath = @"./body//input[@name='doiVal']/@value";
+    else
         return nil;
+    DOMNode *node = [[domDocument documentElement] 
singleNodeForXPath:doiNodeXPath];
     
-    NSString *parentNumber = [match groupAtIndex:1];
-    NSString *articleNumber = [match groupAtIndex:2];
+    if (node == nil)
+        return NO;
     
-    return [NSString 
stringWithFormat:@"/downformats.cfm?id=%@&parent_id=%@&expformat=bibtex", 
articleNumber, parentNumber];
+    return nil != [self citationURLStringFromNode:node];
 }
 
-- (NSString *)bibTeXStringFromDownload:(BDSKCitationDownload *)download {
-    NSString *string = [download string];
+- (NSArray *)itemsReturningError:(NSError **)outError {
+    NSString *doiNodeXPath = nil;
+    if ([[self URL] hasFirstPathComponent:@"doi"])
+        doiNodeXPath = @"./head/meta[@name='dc.Identifier' and 
@scheme='doi']/@content";
+    else
+        doiNodeXPath = @"./body//input[@name='doiVal']/@value";
+
+    NSArray *doiNodes = [[[self domDocument] documentElement] 
nodesForXPath:doiNodeXPath];
+    NSMutableSet *dois = [NSMutableSet set];
     
-    // remove characters before the first @ symbol
-    NSRange range = [string rangeOfString:@"@"];
-    if (range.location == NSNotFound)
-        return nil;
+    for (DOMNode *doiNode in doiNodes) {
+        NSString *doi = [doiNode stringValue];
+        
+        if ([dois containsObject:doi]) continue;
+        [dois addObject:doi];
+        
+        NSURL *doiURL = [NSURL URLWithString:[@"https://doi.org/"; 
stringByAppendingString:doi]];
+        NSMutableURLRequest *request = [NSMutableURLRequest 
requestWithURL:doiURL];
+        [request setValue:@"application/x-bibtex, 
text/bibliography;style=bibtex;q=0.5" forHTTPHeaderField:@"Accept"];
+        
+        NSString *urlString = [@"https://dl.acm.org/doi/"; 
stringByAppendingString:doi];
+        
+        [self addDownloadWithRequest:request contextInfo:urlString];
+    }
     
-    string = [string substringFromIndex:range.location];
+    return nil;
+}
+
+- (NSString *)bibTeXStringFromDownload:(BDSKCitationDownload *)download {
+    return [[download string] stringByRemovingSurroundingWhitespace];
+}
+
+- (NSArray *)itemsFromDownload:(BDSKCitationDownload *)download error:(NSError 
**)outError {
+    NSArray *items = [super itemsFromDownload:download error:outError];
     
-    // remove spaces in cite key (for some reason, ACM will use author names 
with spaces in the cite key
-    // but btparse chokes on these)
-    range = [string rangeOfString:@","];
-    if (range.location == NSNotFound)
-        return nil;
+    [[items firstObject] addURLString:[download contextInfo]];
     
-    return [string stringByReplacingOccurrencesOfString:@" " withString:@"" 
options:0 range:NSMakeRange(0, range.location)];
+    return items;
 }
 
 + (NSString *)name {return @"ACM"; }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.



_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit

Reply via email to