Revision: 15277
          http://bibdesk.svn.sourceforge.net/bibdesk/?rev=15277&view=rev
Author:   hofman
Date:     2009-05-06 14:32:28 +0000 (Wed, 06 May 2009)

Log Message:
-----------
Add a parser for pubmed XML and use it for pubmed downloaded from the entrez 
server

Modified Paths:
--------------
    trunk/bibdesk/BDSKEntrezGroupServer.m
    trunk/bibdesk/BDSKStringParser.h
    trunk/bibdesk/BDSKStringParser.m
    trunk/bibdesk/BibItem_PubMedLookup.m
    trunk/bibdesk/Bibdesk.xcodeproj/project.pbxproj

Added Paths:
-----------
    trunk/bibdesk/BDSKPubMedXMLParser.h
    trunk/bibdesk/BDSKPubMedXMLParser.m

Modified: trunk/bibdesk/BDSKEntrezGroupServer.m
===================================================================
--- trunk/bibdesk/BDSKEntrezGroupServer.m       2009-05-06 13:39:59 UTC (rev 
15276)
+++ trunk/bibdesk/BDSKEntrezGroupServer.m       2009-05-06 14:32:28 UTC (rev 
15277)
@@ -58,6 +58,7 @@
 #import "BDSKServerInfo.h"
 #import "NSError_BDSKExtensions.h"
 #import "NSFileManager_BDSKExtensions.h"
+#import "BDSKPubMedXMLParser.h"
 
 @implementation BDSKEntrezGroupServer
 
@@ -266,7 +267,7 @@
     NSInteger numResults = MIN([self numberOfAvailableResults] - [self 
numberOfFetchedResults], MAX_RESULTS);
     
     // need to escape queryKey, but the rest should be valid for a URL
-    NSString *efetch = [[[self class] baseURLString] 
stringByAppendingFormat:@"/efetch.fcgi?rettype=medline&retmode=text&retstart=%ld&retmax=%ld&db=%@&query_key=%@&WebEnv=%@&tool=bibdesk",
 (long)[self numberOfFetchedResults], (long)numResults, [[self serverInfo] 
database], [[self queryKey] stringByAddingPercentEscapesIncludingReserved], 
[self webEnv]];
+    NSString *efetch = [[[self class] baseURLString] 
stringByAppendingFormat:@"/efetch.fcgi?rettype=abstract&retmode=xml&retstart=%ld&retmax=%ld&db=%@&query_key=%@&WebEnv=%@&tool=bibdesk",
 (long)[self numberOfFetchedResults], (long)numResults, [[self serverInfo] 
database], [[self queryKey] stringByAddingPercentEscapesIncludingReserved], 
[self webEnv]];
     NSURL *theURL = [NSURL URLWithString:efetch];
     BDSKPOSTCONDITION(theURL);
     
@@ -304,43 +305,20 @@
         [URLDownload release];
         URLDownload = nil;
     }
-
-    // tried using -[NSString stringWithContentsOfFile:usedEncoding:error:] 
but it fails too often
-    NSString *contentString = [NSString stringWithContentsOfFile:filePath 
encoding:0 guessEncoding:YES];
-    NSArray *pubs = nil;
-    if (nil == contentString) {
+    
+    // specifically requested the XML type, so go straight to the correct 
parser
+    NSArray *pubs = [BDSKPubMedXMLParser itemsFromData:[NSData 
dataWithContentsOfMappedFile:filePath] error:&presentableError];
+    
+    if (nil == pubs) {
         failedDownload = YES;
-        presentableError = [NSError 
mutableLocalErrorWithCode:kBDSKStringEncodingError 
localizedDescription:NSLocalizedString(@"Empty search result", @"error when 
pubmed search fails")];
-        [presentableError setValue:NSLocalizedString(@"Either the server 
didn't return any data, or BibDesk was unable to read it as text.", @"Error 
informative text") forKey:NSLocalizedRecoverySuggestionErrorKey];
+        [NSApp presentError:presentableError];
     } else {
-        NSInteger type = [contentString contentStringType];
-        BOOL isPartialData = NO;
-        NSError *error;
-        if (type == BDSKBibTeXStringType) {
-            NSMutableString *frontMatter = [NSMutableString string];
-            pubs = [BDSKBibTeXParser itemsFromData:[contentString 
dataUsingEncoding:NSUTF8StringEncoding] frontMatter:frontMatter 
filePath:filePath document:group encoding:NSUTF8StringEncoding 
isPartialData:&isPartialData error:&error];
-        } else if (type != BDSKUnknownStringType && type != 
BDSKNoKeyBibTeXStringType){
-            pubs = [BDSKStringParser itemsFromString:contentString ofType:type 
error:&error];
-        } else {
-            // this branch exists strictly to ensure that the error is 
initialized before being embedded
-            error = [NSError mutableLocalErrorWithCode:kBDSKUnknownError 
localizedDescription:NSLocalizedString(@"Unknown data type", @"")];
-        }
-        if (pubs == nil || isPartialData) {
-            failedDownload = YES;
-        }
-        presentableError = [NSError 
mutableLocalErrorWithCode:kBDSKUnknownError 
localizedDescription:NSLocalizedString(@"Incorrect result type", @"error when 
pubmed parse fails")];
-        [presentableError setValue:NSLocalizedString(@"The server did not 
return a recognized data format.  This is likely a server problem.", @"error 
when pubmed parse fails") forKey:NSLocalizedRecoverySuggestionErrorKey];
-        [presentableError embedError:error];
+        [group addPublications:pubs];
     }
     
     [[NSFileManager defaultManager] removeFileAtPath:filePath handler:nil];
     [filePath release];
     filePath = nil;
-    
-    if (failedDownload)
-        [NSApp presentError:presentableError];
-
-    [group addPublications:pubs];
 }
 
 - (void)download:(NSURLDownload *)download didFailWithError:(NSError *)error

Added: trunk/bibdesk/BDSKPubMedXMLParser.h
===================================================================
--- trunk/bibdesk/BDSKPubMedXMLParser.h                         (rev 0)
+++ trunk/bibdesk/BDSKPubMedXMLParser.h 2009-05-06 14:32:28 UTC (rev 15277)
@@ -0,0 +1,50 @@
+//
+//  BDSKPubMedXMLParser.h
+//  Bibdesk
+//
+//  Created by Adam Maxwell on 5/2/09.
+/*
+ This software is Copyright (c) 2009
+ Adam Maxwell. All rights reserved.
+ 
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ 
+ - Neither the name of Adam Maxwell nor the names of any
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+ 
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#import <Cocoa/Cocoa.h>
+#import "BDSKStringParser.h"
+
+...@interface BDSKPubMedXMLParser : BDSKStringParser {
+
+}
+
++ (BOOL)canParseString:(NSString *)string;
++ (NSArray *)itemsFromString:(NSString *)itemString error:(NSError **)outError;
++ (NSArray *)itemsFromData:(NSData *)itemData error:(NSError **)outError;
+
+...@end

Added: trunk/bibdesk/BDSKPubMedXMLParser.m
===================================================================
--- trunk/bibdesk/BDSKPubMedXMLParser.m                         (rev 0)
+++ trunk/bibdesk/BDSKPubMedXMLParser.m 2009-05-06 14:32:28 UTC (rev 15277)
@@ -0,0 +1,373 @@
+//
+//  BDSKPubMedXMLParser.m
+//  Bibdesk
+//
+//  Created by Adam Maxwell on 5/2/09.
+/*
+ This software is Copyright (c) 2009
+ Adam Maxwell. All rights reserved.
+ 
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ 
+ - Neither the name of Adam Maxwell nor the names of any
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+ 
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#import "BDSKPubMedXMLParser.h"
+#import "BibItem.h"
+
+/*
+ See documentation at
+ 
+ http://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html
+ 
+ */
+
+...@interface NSXMLNode (BDSKPubMedExtensions)
+- (NSXMLNode *)firstNodeForXPath:(NSString *)xpath;
+...@end
+
+...@implementation NSXMLNode (BDSKPubMedExtensions)
+
+- (NSXMLNode *)firstNodeForXPath:(NSString *)xpath;
+{
+    NSError *error;
+    NSArray *nodes = [self nodesForXPath:xpath error:&error];
+#ifdef OMNI_ASSERTIONS_ON
+    if (nil == nodes) NSLog(@"Error for XPath %@: %@", xpath, error);
+#endif
+    return [nodes count] ? [nodes objectAtIndex:0] : nil;
+}
+
+...@end
+
+
+...@implementation BDSKPubMedXMLParser
+
+static bool _useTitlecase = true;
+#ifdef OMNI_ASSERTIONS_ON
+static bool _addXMLStringToAnnote = true;
+#else
+static bool _addXMLStringToAnnote = false;
+#endif
+
++ (void)initialize
+{
+    // this is messy, but may be useful for debugging
+    if ([[NSUserDefaults standardUserDefaults] 
boolForKey:@"BDSKAddPubMedXMLStringToAnnote"])
+        _addXMLStringToAnnote = true;
+    // try to allow for common titlecasing in PubMed (which gives us sentence 
case journal titles)
+    if ([[NSUserDefaults standardUserDefaults] 
boolForKey:@"BDSKDisablePubMedXMLTitleCasing"])
+        _useTitlecase = false;
+}
+
++ (BOOL)canParseString:(NSString *)string;
+{
+    return [string rangeOfString:@"<!DOCTYPE PubmedArticleSet" 
options:NSCaseInsensitiveSearch].length > 0;
+}
+
+// convenience to avoid creating a local variable and checking it each time
+static inline void addStringToDictionaryIfNotNil(NSString *value, NSString 
*key, NSMutableDictionary *dict)
+{
+    if (value) [dict setObject:[value stringByBackslashEscapingTeXSpecials] 
forKey:key];
+}
+
+// convenience to add the string value of a node; only adds if non-nil
+static inline void addStringValueOfNodeForField(NSXMLNode *child, NSString 
*field, NSMutableDictionary *pubFields)
+{
+    addStringToDictionaryIfNotNil([child stringValue], field, pubFields);
+}
+
++ (void)_addPubDateNode:(NSXMLNode *)dateNode 
toDictionary:(NSMutableDictionary *)pubFields
+{
+    NSEnumerator *compEnum = [[dateNode children] objectEnumerator];
+    NSXMLNode *comp;
+    
+    while (comp = [compEnum nextObject]) {
+        
+        if ([[comp name] isEqualToString:@"Year"]) {
+            addStringValueOfNodeForField(comp, BDSKYearString, pubFields);
+        }
+        else if ([[comp name] isEqualToString:@"Month"]) {
+            addStringValueOfNodeForField(comp, BDSKMonthString, pubFields);
+        }
+        else if ([[comp name] isEqualToString:@"MedlineDate"]) {
+            // this is a fallback mechanism
+            addStringValueOfNodeForField(comp, BDSKDateString, pubFields);
+            
+            // first 4 digits should be a date
+            NSScanner *scanner = [[NSScanner alloc] initWithString:[comp 
stringValue]];
+            NSString *year;
+            if ([scanner scanCharactersFromSet:[NSCharacterSet 
decimalDigitCharacterSet] intoString:&year] && [year length] == 4)
+                addStringToDictionaryIfNotNil(year, BDSKYearString, pubFields);
+            [scanner release];
+        }
+    }
+}
+
++ (void)_addJournalNode:(NSXMLNode *)journalNode 
toDictionary:(NSMutableDictionary *)pubFields
+{
+    /*
+     <Journal>
+        <ISSN IssnType="Print">1821-6404</ISSN>
+        <JournalIssue CitedMedium="Print">
+            <Volume>10</Volume>
+            <Issue>4</Issue>
+            <PubDate>
+                <Year>2008</Year>
+                <Month>Oct</Month>
+            </PubDate>
+        </JournalIssue>
+        <Title>Tanzania journal of health research</Title>
+     </Journal>
+     */
+    
+    NSEnumerator *nodeEnum = [[journalNode children] objectEnumerator];
+    NSXMLNode *node;
+    
+    while (node = [nodeEnum nextObject]) {
+        
+        NSString *nodeName = [node name];
+        
+        if ([nodeName isEqualToString:@"Title"]) {
+            addStringToDictionaryIfNotNil(_useTitlecase ? [[node stringValue] 
titlecaseString] : [node stringValue], BDSKJournalString, pubFields);
+        }
+        else if ([nodeName isEqualToString:@"JournalIssue"]) {
+            
+            NSEnumerator *childEnum = [[node children] objectEnumerator];
+            NSXMLNode *child;
+            
+            while (child = [childEnum nextObject]) {
+                NSString *childName = [child name];
+                if ([childName isEqualToString:@"Volume"]) 
addStringValueOfNodeForField(child, BDSKVolumeString, pubFields);
+                else if ([childName isEqualToString:@"Issue"]) 
addStringValueOfNodeForField(child, BDSKNumberString, pubFields);
+                else if ([childName isEqualToString:@"PubDate"]) [self 
_addPubDateNode:child toDictionary:pubFields];
+            }
+        }
+    }
+}
+
++ (void)_addAuthorListNode:(NSXMLNode *)authorListNode 
toDictionary:(NSMutableDictionary *)pubFields
+{    
+    /*
+        <AuthorList CompleteYN="Y">
+            <Author ValidYN="Y">
+                <LastName>Ezekiel</LastName>
+                <ForeName>M J</ForeName>
+                <Initials>MJ</Initials>
+                <Suffix>Jr</Suffix>
+            </Author>
+        </AuthorList>
+     
+     NB: ForeName is the only key documented, but testing reveals FirstName 
may be used instead.
+     nlmcommon_090101.dtd sez MiddleName may appear with FirstName as well.
+     
+     CollectiveName is for a corporate name, although it may be interspersed 
with other authors.  
+     Enclose these in braces as a last name only.  See PMID 18084292 for an 
example.
+     
+     */
+    
+    NSMutableArray *authorNames = [NSMutableArray new];
+    NSEnumerator *authorEnum = [[authorListNode children] objectEnumerator];
+    NSXMLNode *authorNode;
+    
+    while (authorNode = [authorEnum nextObject]) {
+        
+        // this should always be true...
+        if ([[authorNode name] isEqualToString:@"Author"]) {
+            
+            NSString *lastName = nil;
+            NSString *firstName = nil;
+            NSString *middleName = nil;
+            NSString *suffix = nil;
+            NSEnumerator *nameEnum = [[authorNode children] objectEnumerator];
+            NSXMLNode *name;
+            
+            while (name = [nameEnum nextObject]) {
+                
+                NSString *nodeName = [name name];
+                
+                if ([nodeName isEqualToString:@"LastName"]) lastName = [name 
stringValue];
+                else if ([nodeName isEqualToString:@"ForeName"] || [nodeName 
isEqualToString:@"FirstName"]) firstName = [name stringValue];
+                else if ([nodeName isEqualToString:@"Suffix"]) suffix = [name 
stringValue];
+                else if ([nodeName isEqualToString:@"MiddleName"]) middleName 
= [name stringValue];
+                else if ([nodeName isEqualToString:@"CollectiveName"]) 
lastName = [NSString stringWithFormat:@"{...@}", [name stringValue]];
+            }
+            
+            // normalized form for btparse: von Last, Jr, First Middle
+            NSMutableString *fullName = [NSMutableString new];
+            if (lastName) {
+                [fullName appendString:lastName];
+            }
+            if (suffix) {
+                if ([fullName isEqualToString:@""] == NO)
+                    [fullName appendString:@", "];
+                [fullName appendString:suffix];
+            }
+            if (firstName) {
+                if ([fullName isEqualToString:@""] == NO)
+                    [fullName appendString:@", "];
+                [fullName appendString:firstName];
+            }
+            if (middleName) {
+                // no comma for a middle name
+                if ([fullName isEqualToString:@""] == NO)
+                    [fullName appendString:@" "];
+                // typically just an initial, but the .bst will handle any dot 
for abbreviationx
+                [fullName appendString:middleName];
+            }
+            [authorNames addObject:fullName];
+            [fullName release];
+        }
+        else {
+            NSLog(@"Unknown node name %@ in %@", [authorNode name], 
authorListNode);
+        }
+    }
+    
+    if ([authorNames count])
+        addStringToDictionaryIfNotNil([authorNames componentsJoinedByString:@" 
and "], BDSKAuthorString, pubFields);
+    [authorNames release];
+}
+
++ (void)_addMeshNode:(NSXMLNode *)listNode toDictionary:(NSMutableDictionary 
*)pubFields
+{
+    if ([[listNode children] count] == 0)
+        return;
+
+    NSMutableString *meshString = [NSMutableString new];
+    NSString *keywordSeparator = [[NSUserDefaults standardUserDefaults] 
objectForKey:BDSKDefaultGroupFieldSeparatorKey];
+    NSEnumerator *meshEnum = [[listNode children] objectEnumerator];
+    NSXMLNode *meshNode;
+
+    while (meshNode = [meshEnum nextObject]) {
+        if ([[meshNode name] isEqualToString:@"MeshHeading"]) {
+            NSEnumerator *headingEnum = [[meshNode children] objectEnumerator];
+            NSXMLNode *headingNode;
+            
+            while (headingNode = [headingEnum nextObject]) {
+                
+                // add descriptor name and ignore qualifier name
+                if ([[headingNode name] isEqualToString:@"DescriptorName"]) {
+                    if ([meshString length])
+                        [meshString appendString:keywordSeparator];
+                    [meshString appendString:[headingNode stringValue]];
+                }
+            }
+        }
+    }
+    [pubFields setObject:meshString forKey:@"Mesh"];
+    [meshString release];
+}
+
++ (void)_addKeywordNode:(NSXMLNode *)listNode 
toDictionary:(NSMutableDictionary *)pubFields
+{
+    if ([[listNode children] count] == 0)
+        return;
+    
+    NSMutableString *keywordString = [NSMutableString new];
+    NSString *keywordSeparator = [[NSUserDefaults standardUserDefaults] 
objectForKey:BDSKDefaultGroupFieldSeparatorKey];
+    NSEnumerator *keywordEnum = [[listNode children] objectEnumerator];
+    NSXMLNode *keywordNode;
+    
+    while (keywordNode = [keywordEnum nextObject]) {
+        
+        if ([[keywordNode name] isEqualToString:@"Keyword"]) {
+            if ([keywordString length])
+                [keywordString appendString:keywordSeparator];
+            [keywordString appendString:[keywordNode stringValue]];
+        }
+    }
+    [pubFields setObject:keywordString forKey:BDSKKeywordsString];
+    [keywordString release];
+}
+
++ (NSArray *)_itemsFromDocument:(NSXMLDocument *)doc error:(NSError 
**)outError;
+{
+    NSArray *articles = [doc nodesForXPath:@"//PubmedArticle" error:outError];
+    NSMutableArray *pubs = [NSMutableArray array];
+    NSEnumerator *articleEnum = [articles objectEnumerator];
+    NSXMLNode *article;
+    
+    while (article = [articleEnum nextObject]) {
+        
+        NSXMLNode *citation = [article 
firstNodeForXPath:@"./MedlineCitation"];        
+        NSMutableDictionary *pubFields = [NSMutableDictionary new];
+        
+        [self _addJournalNode:[citation 
firstNodeForXPath:@"./Article/Journal"] toDictionary:pubFields];
+        [self _addAuthorListNode:[citation 
firstNodeForXPath:@"./Article/AuthorList"] toDictionary:pubFields];
+        
+        // ex. PMID 16187791
+        [self _addMeshNode:[citation firstNodeForXPath:@"./MeshHeadingList"] 
toDictionary:pubFields];
+        [self _addKeywordNode:[citation firstNodeForXPath:@"./KeywordList"] 
toDictionary:pubFields];
+        
+        NSString *title = [[citation 
firstNodeForXPath:@"./Article/ArticleTitle"] stringValue];
+        addStringToDictionaryIfNotNil([title stringByRemovingSuffix:@"."], 
BDSKTitleString, pubFields);        
+        addStringValueOfNodeForField([citation 
firstNodeForXPath:@"./Article/Abstract/AbstractText"], BDSKAbstractString, 
pubFields);
+        addStringValueOfNodeForField([citation 
firstNodeForXPath:@"./Article/Pagination/MedlinePgn"], BDSKPagesString, 
pubFields);
+        addStringValueOfNodeForField([citation firstNodeForXPath:@"./PMID"], 
@"Pmid", pubFields);
+        
+        // grab the DOI if available
+        NSArray *articleIDs = [article 
nodesForXPath:@"./PubmedData/ArticleIdList/ArticleId" error:NULL];
+        NSEnumerator *articleIDEnum = [articleIDs objectEnumerator];
+        NSXMLElement *articleID;
+        
+        while (articleID = [articleIDEnum nextObject]) {
+            if ([articleID kind] == NSXMLElementKind && [[[articleID 
attributeForName:@"IdType"] stringValue] isEqualToString:@"doi"])
+                addStringValueOfNodeForField(articleID, BDSKDoiString, 
pubFields);
+        }
+        
+        
+        // for debugging
+        if (_addXMLStringToAnnote) addStringToDictionaryIfNotNil([article 
XMLStringWithOptions:NSXMLNodePrettyPrint], BDSKAnnoteString, pubFields);
+        
+        BibItem *pub = [[BibItem allocWithZone:[self zone]] 
initWithType:BDSKArticleString
+                                                                
fileType:BDSKBibtexString
+                                                                 citeKey:nil
+                                                               
pubFields:pubFields
+                                                                   isNew:YES];
+        [pubs addObject:pub];
+        [pub release];
+        [pubFields release];
+    }
+    
+    return pubs;    
+}
+
++ (NSArray *)itemsFromString:(NSString *)itemString error:(NSError **)outError;
+{
+    NSXMLDocument *doc = [[NSXMLDocument allocWithZone:[self zone]] 
initWithXMLString:itemString options:NSXMLNodeOptionsNone error:outError];
+    doc = [doc autorelease];
+    return doc ? [self _itemsFromDocument:doc error:outError] : nil;
+}
+
++ (NSArray *)itemsFromData:(NSData *)itemData error:(NSError **)outError;
+{
+    NSXMLDocument *doc = [[NSXMLDocument allocWithZone:[self zone]] 
initWithData:itemData options:NSXMLNodeOptionsNone error:outError];
+    doc = [doc autorelease];
+    return doc ? [self _itemsFromDocument:doc error:outError] : nil;
+}
+
+...@end

Modified: trunk/bibdesk/BDSKStringParser.h
===================================================================
--- trunk/bibdesk/BDSKStringParser.h    2009-05-06 13:39:59 UTC (rev 15276)
+++ trunk/bibdesk/BDSKStringParser.h    2009-05-06 14:32:28 UTC (rev 15277)
@@ -51,7 +51,8 @@
        BDSKDublinCoreStringType,
     BDSKReferStringType,
     BDSKMODSStringType,
-    BDSKSciFinderStringType
+    BDSKSciFinderStringType,
+    BDSKPubMedXMLStringType
 };
 
 // these methods are valid for the abstract class, and should not be used or 
defined for a concrete subclass

Modified: trunk/bibdesk/BDSKStringParser.m
===================================================================
--- trunk/bibdesk/BDSKStringParser.m    2009-05-06 13:39:59 UTC (rev 15276)
+++ trunk/bibdesk/BDSKStringParser.m    2009-05-06 14:32:28 UTC (rev 15277)
@@ -49,6 +49,7 @@
 #import "BDSKReferParser.h"
 #import "BDSKMODSParser.h"
 #import "BDSKSciFinderParser.h"
+#import "BDSKPubMedXMLParser.h"
 #import "BDSKRuntime.h"
 
 @implementation BDSKStringParser
@@ -87,6 +88,9 @@
         case BDSKSciFinderStringType:
             parserClass = [BDSKSciFinderParser class];
             break;
+        case BDSKPubMedXMLStringType:
+            parserClass = [BDSKPubMedXMLParser class];
+            break;
         default:
             parserClass = Nil;
     }    
@@ -141,28 +145,30 @@
 @implementation NSString (BDSKStringParserExtensions)
 
 - (NSInteger)contentStringType{
-       if([BDSKBibTeXParser canParseString:self])
-               return BDSKBibTeXStringType;
-       if([BDSKReferenceMinerParser canParseString:self])
-               return BDSKReferenceMinerStringType;
-       if([BDSKPubMedParser canParseString:self])
-               return BDSKPubMedStringType;
-       if([BDSKRISParser canParseString:self])
-               return BDSKRISStringType;
-       if([BDSKMARCParser canParseString:self])
-               return BDSKMARCStringType;
-       if([BDSKJSTORParser canParseString:self])
-               return BDSKJSTORStringType;
-       if([BDSKWebOfScienceParser canParseString:self])
-               return BDSKWOSStringType;
-       if([BDSKBibTeXParser canParseStringAfterFixingKeys:self])
-               return BDSKNoKeyBibTeXStringType;
+    if([BDSKBibTeXParser canParseString:self])
+        return BDSKBibTeXStringType;
+    if([BDSKReferenceMinerParser canParseString:self])
+        return BDSKReferenceMinerStringType;
+    if([BDSKPubMedParser canParseString:self])
+        return BDSKPubMedStringType;
+    if([BDSKRISParser canParseString:self])
+        return BDSKRISStringType;
+    if([BDSKMARCParser canParseString:self])
+        return BDSKMARCStringType;
+    if([BDSKJSTORParser canParseString:self])
+        return BDSKJSTORStringType;
+    if([BDSKWebOfScienceParser canParseString:self])
+        return BDSKWOSStringType;
+    if([BDSKBibTeXParser canParseStringAfterFixingKeys:self])
+        return BDSKNoKeyBibTeXStringType;
     if([BDSKReferParser canParseString:self])
         return BDSKReferStringType;
     if([BDSKMODSParser canParseString:self])
         return BDSKMODSStringType;
     if([BDSKSciFinderParser canParseString:self])
         return BDSKSciFinderStringType;
+    if([BDSKPubMedXMLParser canParseString:self])
+        return BDSKPubMedXMLStringType;
        // don't check DC, as the check is too unreliable
     return BDSKUnknownStringType;
 }

Modified: trunk/bibdesk/BibItem_PubMedLookup.m
===================================================================
--- trunk/bibdesk/BibItem_PubMedLookup.m        2009-05-06 13:39:59 UTC (rev 
15276)
+++ trunk/bibdesk/BibItem_PubMedLookup.m        2009-05-06 14:32:28 UTC (rev 
15277)
@@ -39,13 +39,14 @@
 #import "BibItem_PubMedLookup.h"
 #import <WebKit/WebKit.h>
 #import "BDSKStringParser.h"
+#import "BDSKPubMedXMLParser.h"
 #import <AGRegex/AGRegex.h>
 #import "NSURL_BDSKExtensions.h"
 #import "NSString_BDSKExtensions.h"
 #import "PDFMetadata.h"
 
 @interface BDSKPubMedLookupHelper : NSObject
-+ (NSString *)referenceForPubMedSearchTerm:(NSString *)searchTerm;
++ (NSData *)xmlReferenceDataForPMID:(NSString *)searchTerm;
 @end
 
 @implementation NSString (PubMedLookup)
@@ -345,8 +346,8 @@
 
 + (id)itemWithPubMedSearchTerm:(NSString *)searchTerm;
 {
-    NSString *string = [BDSKPubMedLookupHelper 
referenceForPubMedSearchTerm:searchTerm];
-    return string ? [[BDSKStringParser itemsFromString:string 
ofType:BDSKUnknownStringType error:NULL] lastObject] : nil;
+    NSData *data = [BDSKPubMedLookupHelper xmlReferenceDataForPMID:searchTerm];
+    return [data length] ? [[BDSKPubMedXMLParser itemsFromData:data 
error:NULL] lastObject] : nil;
 }
 
 @end
@@ -381,12 +382,12 @@
     return canConnect;
 }
 
-+ (NSString *)referenceForPubMedSearchTerm:(NSString *) searchTerm;
++ (NSData *)xmlReferenceDataForPMID:(NSString *)searchTerm;
 {
     NSParameterAssert(searchTerm != nil);
     
-    NSString *toReturn = nil;
-    
+    NSData *toReturn = nil;
+        
     if ([self canConnect] == NO)
         return toReturn;
         
@@ -419,29 +420,12 @@
         if ([count intValue] == 1) {  
             
             // get the first result (zero-based indexing)
-            NSString *efetch = [[[self class] baseURLString] 
stringByAppendingFormat:@"/efetch.fcgi?rettype=medline&retmode=text&retstart=0&retmax=1&db=pubmed&query_key=%@&WebEnv=%@&tool=bibdesk",
 queryKey, webEnv];
+            NSString *efetch = [[[self class] baseURLString] 
stringByAppendingFormat:@"/efetch.fcgi?rettype=abstract&retmode=xml&retstart=0&retmax=1&db=pubmed&query_key=%@&WebEnv=%@&tool=bibdesk",
 queryKey, webEnv];
             theURL = [NSURL URLWithString:efetch];
             BDSKPOSTCONDITION(theURL);
             
             request = [NSURLRequest requestWithURL:theURL 
cachePolicy:NSURLRequestUseProtocolCachePolicy timeoutInterval:1.0];
-            NSData *efetchResult = [NSURLConnection 
sendSynchronousRequest:request returningResponse:&response error:&error];
-            
-            if (efetchResult) {
-                
-                // try to get encoding from the http headers; returned nil 
when I tried
-                NSString *encodingName = [response textEncodingName];
-                NSStringEncoding encoding = encodingName ? 
CFStringConvertEncodingToNSStringEncoding(CFStringConvertIANACharSetNameToEncoding((CFStringRef)encodingName))
 : kCFStringEncodingInvalidId;
-                
-                if (encoding != kCFStringEncodingInvalidId)
-                    toReturn = [[NSString alloc] initWithData:efetchResult 
encoding:encoding];
-                else
-                    toReturn = [[NSString alloc] initWithData:efetchResult 
encoding:NSUTF8StringEncoding];
-                
-                if (nil == toReturn)
-                    toReturn = [[NSString alloc] initWithData:efetchResult 
encoding:NSISOLatin1StringEncoding];
-                
-                [toReturn autorelease];
-            }
+            toReturn = [NSURLConnection sendSynchronousRequest:request 
returningResponse:&response error:&error];
         }
         [document release];
     }

Modified: trunk/bibdesk/Bibdesk.xcodeproj/project.pbxproj
===================================================================
--- trunk/bibdesk/Bibdesk.xcodeproj/project.pbxproj     2009-05-06 13:39:59 UTC 
(rev 15276)
+++ trunk/bibdesk/Bibdesk.xcodeproj/project.pbxproj     2009-05-06 14:32:28 UTC 
(rev 15277)
@@ -188,6 +188,7 @@
                CE6DACC50A503ECF00123185 /* BDSKToolbarItem.m in Sources */ = 
{isa = PBXBuildFile; fileRef = CE6DACC30A503ECF00123185 /* BDSKToolbarItem.m 
*/; };
                CE6DCA230D6A0237003A072F /* BDSKCondition+Scripting.m in 
Sources */ = {isa = PBXBuildFile; fileRef = CE6DCA210D6A0237003A072F /* 
BDSKCondition+Scripting.m */; };
                CE6FB32309DFFCB5005E3E14 /* BDSKSharingBrowser.m in Sources */ 
= {isa = PBXBuildFile; fileRef = CE6FB32109DFFCB5005E3E14 /* 
BDSKSharingBrowser.m */; };
+               CE73BAD10FB1CE9600A43716 /* BDSKPubMedXMLParser.m in Sources */ 
= {isa = PBXBuildFile; fileRef = CE73BACF0FB1CE9600A43716 /* 
BDSKPubMedXMLParser.m */; };
                CE7596000ADDB0E1009C1329 /* BDSKContainerView.m in Sources */ = 
{isa = PBXBuildFile; fileRef = CE7595FE0ADDB0E0009C1329 /* BDSKContainerView.m 
*/; };
                CE7611530EA49B6E00301E45 /* BDSKPrintableView.m in Sources */ = 
{isa = PBXBuildFile; fileRef = CE7611510EA49B6E00301E45 /* BDSKPrintableView.m 
*/; };
                CE76A5430B430EA500E61066 /* BDSKSearchGroupSheet.nib in 
Resources */ = {isa = PBXBuildFile; fileRef = CE76A53D0B430EA500E61066 /* 
BDSKSearchGroupSheet.nib */; };
@@ -1148,6 +1149,8 @@
                CE6DCA210D6A0237003A072F /* BDSKCondition+Scripting.m */ = {isa 
= PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; 
path = "BDSKCondition+Scripting.m"; sourceTree = "<group>"; };
                CE6FB32009DFFCB5005E3E14 /* BDSKSharingBrowser.h */ = {isa = 
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = 
BDSKSharingBrowser.h; sourceTree = "<group>"; };
                CE6FB32109DFFCB5005E3E14 /* BDSKSharingBrowser.m */ = {isa = 
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path 
= BDSKSharingBrowser.m; sourceTree = "<group>"; };
+               CE73BACE0FB1CE9600A43716 /* BDSKPubMedXMLParser.h */ = {isa = 
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = 
BDSKPubMedXMLParser.h; sourceTree = "<group>"; };
+               CE73BACF0FB1CE9600A43716 /* BDSKPubMedXMLParser.m */ = {isa = 
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path 
= BDSKPubMedXMLParser.m; sourceTree = "<group>"; };
                CE7595FD0ADDB0E0009C1329 /* BDSKContainerView.h */ = {isa = 
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = 
BDSKContainerView.h; sourceTree = "<group>"; };
                CE7595FE0ADDB0E0009C1329 /* BDSKContainerView.m */ = {isa = 
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path 
= BDSKContainerView.m; sourceTree = "<group>"; };
                CE7611500EA49B6E00301E45 /* BDSKPrintableView.h */ = {isa = 
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = 
BDSKPrintableView.h; sourceTree = "<group>"; };
@@ -2331,6 +2334,7 @@
                                6C5DE3E60F8FC33B00E02D5F /* 
BDSKMathSiteParser.m */,
                                F9D0E5350BF92768001C6C22 /* BDSKMODSParser.m */,
                                F9022CA70758038000C3F701 /* BDSKPubMedParser.m 
*/,
+                               CE73BACF0FB1CE9600A43716 /* 
BDSKPubMedXMLParser.m */,
                                F940D1080B5568E400B5917A /* BDSKReferParser.m 
*/,
                                CE601A4D0AF4C488000B5680 /* 
BDSKReferenceMinerParser.m */,
                                CE600E2E0AF405D6000B5680 /* BDSKRISParser.m */,
@@ -2709,6 +2713,7 @@
                                CE7611500EA49B6E00301E45 /* BDSKPrintableView.h 
*/,
                                CE565BBD0AEF848B002F0A24 /* 
BDSKPublicationsArray.h */,
                                F9022CA60758038000C3F701 /* BDSKPubMedParser.h 
*/,
+                               CE73BACE0FB1CE9600A43716 /* 
BDSKPubMedXMLParser.h */,
                                CE392F5B08D04083001CEAC8 /* BDSKRatingButton.h 
*/,
                                CE392ED208D034E4001CEAC8 /* 
BDSKRatingButtonCell.h */,
                                CE95A5790A88883300334DFA /* 
BDSKReadMeController.h */,
@@ -3937,6 +3942,7 @@
                                6CD26A240F928EEE0089FDFD /* 
BDSKBibDeskProtocol.m in Sources */,
                                CEDA7E0D0F96497B00F72C0A /* 
NSAlert_BDSKExtensions.m in Sources */,
                                6CAEE4CD0F98EC63009EA5FE /* BDSKCOinSParser.m 
in Sources */,
+                               CE73BAD10FB1CE9600A43716 /* 
BDSKPubMedXMLParser.m in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };


This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.

------------------------------------------------------------------------------
The NEW KODAK i700 Series Scanners deliver under ANY circumstances! Your
production scanning environment may not be a perfect world - but thanks to
Kodak, there's a perfect scanner to get the job done! With the NEW KODAK i700
Series Scanner you'll get full speed at 300 dpi even with all image 
processing features enabled. http://p.sf.net/sfu/kodak-com
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit

Reply via email to