Revision: 15277
http://bibdesk.svn.sourceforge.net/bibdesk/?rev=15277&view=rev
Author: hofman
Date: 2009-05-06 14:32:28 +0000 (Wed, 06 May 2009)
Log Message:
-----------
Add a parser for pubmed XML and use it for pubmed downloaded from the entrez
server
Modified Paths:
--------------
trunk/bibdesk/BDSKEntrezGroupServer.m
trunk/bibdesk/BDSKStringParser.h
trunk/bibdesk/BDSKStringParser.m
trunk/bibdesk/BibItem_PubMedLookup.m
trunk/bibdesk/Bibdesk.xcodeproj/project.pbxproj
Added Paths:
-----------
trunk/bibdesk/BDSKPubMedXMLParser.h
trunk/bibdesk/BDSKPubMedXMLParser.m
Modified: trunk/bibdesk/BDSKEntrezGroupServer.m
===================================================================
--- trunk/bibdesk/BDSKEntrezGroupServer.m 2009-05-06 13:39:59 UTC (rev
15276)
+++ trunk/bibdesk/BDSKEntrezGroupServer.m 2009-05-06 14:32:28 UTC (rev
15277)
@@ -58,6 +58,7 @@
#import "BDSKServerInfo.h"
#import "NSError_BDSKExtensions.h"
#import "NSFileManager_BDSKExtensions.h"
+#import "BDSKPubMedXMLParser.h"
@implementation BDSKEntrezGroupServer
@@ -266,7 +267,7 @@
NSInteger numResults = MIN([self numberOfAvailableResults] - [self
numberOfFetchedResults], MAX_RESULTS);
// need to escape queryKey, but the rest should be valid for a URL
- NSString *efetch = [[[self class] baseURLString]
stringByAppendingFormat:@"/efetch.fcgi?rettype=medline&retmode=text&retstart=%ld&retmax=%ld&db=%@&query_key=%@&WebEnv=%@&tool=bibdesk",
(long)[self numberOfFetchedResults], (long)numResults, [[self serverInfo]
database], [[self queryKey] stringByAddingPercentEscapesIncludingReserved],
[self webEnv]];
+ NSString *efetch = [[[self class] baseURLString]
stringByAppendingFormat:@"/efetch.fcgi?rettype=abstract&retmode=xml&retstart=%ld&retmax=%ld&db=%@&query_key=%@&WebEnv=%@&tool=bibdesk",
(long)[self numberOfFetchedResults], (long)numResults, [[self serverInfo]
database], [[self queryKey] stringByAddingPercentEscapesIncludingReserved],
[self webEnv]];
NSURL *theURL = [NSURL URLWithString:efetch];
BDSKPOSTCONDITION(theURL);
@@ -304,43 +305,20 @@
[URLDownload release];
URLDownload = nil;
}
-
- // tried using -[NSString stringWithContentsOfFile:usedEncoding:error:]
but it fails too often
- NSString *contentString = [NSString stringWithContentsOfFile:filePath
encoding:0 guessEncoding:YES];
- NSArray *pubs = nil;
- if (nil == contentString) {
+
+ // specifically requested the XML type, so go straight to the correct
parser
+ NSArray *pubs = [BDSKPubMedXMLParser itemsFromData:[NSData
dataWithContentsOfMappedFile:filePath] error:&presentableError];
+
+ if (nil == pubs) {
failedDownload = YES;
- presentableError = [NSError
mutableLocalErrorWithCode:kBDSKStringEncodingError
localizedDescription:NSLocalizedString(@"Empty search result", @"error when
pubmed search fails")];
- [presentableError setValue:NSLocalizedString(@"Either the server
didn't return any data, or BibDesk was unable to read it as text.", @"Error
informative text") forKey:NSLocalizedRecoverySuggestionErrorKey];
+ [NSApp presentError:presentableError];
} else {
- NSInteger type = [contentString contentStringType];
- BOOL isPartialData = NO;
- NSError *error;
- if (type == BDSKBibTeXStringType) {
- NSMutableString *frontMatter = [NSMutableString string];
- pubs = [BDSKBibTeXParser itemsFromData:[contentString
dataUsingEncoding:NSUTF8StringEncoding] frontMatter:frontMatter
filePath:filePath document:group encoding:NSUTF8StringEncoding
isPartialData:&isPartialData error:&error];
- } else if (type != BDSKUnknownStringType && type !=
BDSKNoKeyBibTeXStringType){
- pubs = [BDSKStringParser itemsFromString:contentString ofType:type
error:&error];
- } else {
- // this branch exists strictly to ensure that the error is
initialized before being embedded
- error = [NSError mutableLocalErrorWithCode:kBDSKUnknownError
localizedDescription:NSLocalizedString(@"Unknown data type", @"")];
- }
- if (pubs == nil || isPartialData) {
- failedDownload = YES;
- }
- presentableError = [NSError
mutableLocalErrorWithCode:kBDSKUnknownError
localizedDescription:NSLocalizedString(@"Incorrect result type", @"error when
pubmed parse fails")];
- [presentableError setValue:NSLocalizedString(@"The server did not
return a recognized data format. This is likely a server problem.", @"error
when pubmed parse fails") forKey:NSLocalizedRecoverySuggestionErrorKey];
- [presentableError embedError:error];
+ [group addPublications:pubs];
}
[[NSFileManager defaultManager] removeFileAtPath:filePath handler:nil];
[filePath release];
filePath = nil;
-
- if (failedDownload)
- [NSApp presentError:presentableError];
-
- [group addPublications:pubs];
}
- (void)download:(NSURLDownload *)download didFailWithError:(NSError *)error
Added: trunk/bibdesk/BDSKPubMedXMLParser.h
===================================================================
--- trunk/bibdesk/BDSKPubMedXMLParser.h (rev 0)
+++ trunk/bibdesk/BDSKPubMedXMLParser.h 2009-05-06 14:32:28 UTC (rev 15277)
@@ -0,0 +1,50 @@
+//
+// BDSKPubMedXMLParser.h
+// Bibdesk
+//
+// Created by Adam Maxwell on 5/2/09.
+/*
+ This software is Copyright (c) 2009
+ Adam Maxwell. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+
+ - Neither the name of Adam Maxwell nor the names of any
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#import <Cocoa/Cocoa.h>
+#import "BDSKStringParser.h"
+
+...@interface BDSKPubMedXMLParser : BDSKStringParser {
+
+}
+
++ (BOOL)canParseString:(NSString *)string;
++ (NSArray *)itemsFromString:(NSString *)itemString error:(NSError **)outError;
++ (NSArray *)itemsFromData:(NSData *)itemData error:(NSError **)outError;
+
+...@end
Added: trunk/bibdesk/BDSKPubMedXMLParser.m
===================================================================
--- trunk/bibdesk/BDSKPubMedXMLParser.m (rev 0)
+++ trunk/bibdesk/BDSKPubMedXMLParser.m 2009-05-06 14:32:28 UTC (rev 15277)
@@ -0,0 +1,373 @@
+//
+// BDSKPubMedXMLParser.m
+// Bibdesk
+//
+// Created by Adam Maxwell on 5/2/09.
+/*
+ This software is Copyright (c) 2009
+ Adam Maxwell. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+
+ - Neither the name of Adam Maxwell nor the names of any
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#import "BDSKPubMedXMLParser.h"
+#import "BibItem.h"
+
+/*
+ See documentation at
+
+ http://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html
+
+ */
+
+...@interface NSXMLNode (BDSKPubMedExtensions)
+- (NSXMLNode *)firstNodeForXPath:(NSString *)xpath;
+...@end
+
+...@implementation NSXMLNode (BDSKPubMedExtensions)
+
+- (NSXMLNode *)firstNodeForXPath:(NSString *)xpath;
+{
+ NSError *error;
+ NSArray *nodes = [self nodesForXPath:xpath error:&error];
+#ifdef OMNI_ASSERTIONS_ON
+ if (nil == nodes) NSLog(@"Error for XPath %@: %@", xpath, error);
+#endif
+ return [nodes count] ? [nodes objectAtIndex:0] : nil;
+}
+
+...@end
+
+
+...@implementation BDSKPubMedXMLParser
+
+static bool _useTitlecase = true;
+#ifdef OMNI_ASSERTIONS_ON
+static bool _addXMLStringToAnnote = true;
+#else
+static bool _addXMLStringToAnnote = false;
+#endif
+
++ (void)initialize
+{
+ // this is messy, but may be useful for debugging
+ if ([[NSUserDefaults standardUserDefaults]
boolForKey:@"BDSKAddPubMedXMLStringToAnnote"])
+ _addXMLStringToAnnote = true;
+ // try to allow for common titlecasing in PubMed (which gives us sentence
case journal titles)
+ if ([[NSUserDefaults standardUserDefaults]
boolForKey:@"BDSKDisablePubMedXMLTitleCasing"])
+ _useTitlecase = false;
+}
+
++ (BOOL)canParseString:(NSString *)string;
+{
+ return [string rangeOfString:@"<!DOCTYPE PubmedArticleSet"
options:NSCaseInsensitiveSearch].length > 0;
+}
+
+// convenience to avoid creating a local variable and checking it each time
+static inline void addStringToDictionaryIfNotNil(NSString *value, NSString
*key, NSMutableDictionary *dict)
+{
+ if (value) [dict setObject:[value stringByBackslashEscapingTeXSpecials]
forKey:key];
+}
+
+// convenience to add the string value of a node; only adds if non-nil
+static inline void addStringValueOfNodeForField(NSXMLNode *child, NSString
*field, NSMutableDictionary *pubFields)
+{
+ addStringToDictionaryIfNotNil([child stringValue], field, pubFields);
+}
+
++ (void)_addPubDateNode:(NSXMLNode *)dateNode
toDictionary:(NSMutableDictionary *)pubFields
+{
+ NSEnumerator *compEnum = [[dateNode children] objectEnumerator];
+ NSXMLNode *comp;
+
+ while (comp = [compEnum nextObject]) {
+
+ if ([[comp name] isEqualToString:@"Year"]) {
+ addStringValueOfNodeForField(comp, BDSKYearString, pubFields);
+ }
+ else if ([[comp name] isEqualToString:@"Month"]) {
+ addStringValueOfNodeForField(comp, BDSKMonthString, pubFields);
+ }
+ else if ([[comp name] isEqualToString:@"MedlineDate"]) {
+ // this is a fallback mechanism
+ addStringValueOfNodeForField(comp, BDSKDateString, pubFields);
+
+ // first 4 digits should be a date
+ NSScanner *scanner = [[NSScanner alloc] initWithString:[comp
stringValue]];
+ NSString *year;
+ if ([scanner scanCharactersFromSet:[NSCharacterSet
decimalDigitCharacterSet] intoString:&year] && [year length] == 4)
+ addStringToDictionaryIfNotNil(year, BDSKYearString, pubFields);
+ [scanner release];
+ }
+ }
+}
+
++ (void)_addJournalNode:(NSXMLNode *)journalNode
toDictionary:(NSMutableDictionary *)pubFields
+{
+ /*
+ <Journal>
+ <ISSN IssnType="Print">1821-6404</ISSN>
+ <JournalIssue CitedMedium="Print">
+ <Volume>10</Volume>
+ <Issue>4</Issue>
+ <PubDate>
+ <Year>2008</Year>
+ <Month>Oct</Month>
+ </PubDate>
+ </JournalIssue>
+ <Title>Tanzania journal of health research</Title>
+ </Journal>
+ */
+
+ NSEnumerator *nodeEnum = [[journalNode children] objectEnumerator];
+ NSXMLNode *node;
+
+ while (node = [nodeEnum nextObject]) {
+
+ NSString *nodeName = [node name];
+
+ if ([nodeName isEqualToString:@"Title"]) {
+ addStringToDictionaryIfNotNil(_useTitlecase ? [[node stringValue]
titlecaseString] : [node stringValue], BDSKJournalString, pubFields);
+ }
+ else if ([nodeName isEqualToString:@"JournalIssue"]) {
+
+ NSEnumerator *childEnum = [[node children] objectEnumerator];
+ NSXMLNode *child;
+
+ while (child = [childEnum nextObject]) {
+ NSString *childName = [child name];
+ if ([childName isEqualToString:@"Volume"])
addStringValueOfNodeForField(child, BDSKVolumeString, pubFields);
+ else if ([childName isEqualToString:@"Issue"])
addStringValueOfNodeForField(child, BDSKNumberString, pubFields);
+ else if ([childName isEqualToString:@"PubDate"]) [self
_addPubDateNode:child toDictionary:pubFields];
+ }
+ }
+ }
+}
+
++ (void)_addAuthorListNode:(NSXMLNode *)authorListNode
toDictionary:(NSMutableDictionary *)pubFields
+{
+ /*
+ <AuthorList CompleteYN="Y">
+ <Author ValidYN="Y">
+ <LastName>Ezekiel</LastName>
+ <ForeName>M J</ForeName>
+ <Initials>MJ</Initials>
+ <Suffix>Jr</Suffix>
+ </Author>
+ </AuthorList>
+
+ NB: ForeName is the only key documented, but testing reveals FirstName
may be used instead.
+ nlmcommon_090101.dtd sez MiddleName may appear with FirstName as well.
+
+ CollectiveName is for a corporate name, although it may be interspersed
with other authors.
+ Enclose these in braces as a last name only. See PMID 18084292 for an
example.
+
+ */
+
+ NSMutableArray *authorNames = [NSMutableArray new];
+ NSEnumerator *authorEnum = [[authorListNode children] objectEnumerator];
+ NSXMLNode *authorNode;
+
+ while (authorNode = [authorEnum nextObject]) {
+
+ // this should always be true...
+ if ([[authorNode name] isEqualToString:@"Author"]) {
+
+ NSString *lastName = nil;
+ NSString *firstName = nil;
+ NSString *middleName = nil;
+ NSString *suffix = nil;
+ NSEnumerator *nameEnum = [[authorNode children] objectEnumerator];
+ NSXMLNode *name;
+
+ while (name = [nameEnum nextObject]) {
+
+ NSString *nodeName = [name name];
+
+ if ([nodeName isEqualToString:@"LastName"]) lastName = [name
stringValue];
+ else if ([nodeName isEqualToString:@"ForeName"] || [nodeName
isEqualToString:@"FirstName"]) firstName = [name stringValue];
+ else if ([nodeName isEqualToString:@"Suffix"]) suffix = [name
stringValue];
+ else if ([nodeName isEqualToString:@"MiddleName"]) middleName
= [name stringValue];
+ else if ([nodeName isEqualToString:@"CollectiveName"])
lastName = [NSString stringWithFormat:@"{...@}", [name stringValue]];
+ }
+
+ // normalized form for btparse: von Last, Jr, First Middle
+ NSMutableString *fullName = [NSMutableString new];
+ if (lastName) {
+ [fullName appendString:lastName];
+ }
+ if (suffix) {
+ if ([fullName isEqualToString:@""] == NO)
+ [fullName appendString:@", "];
+ [fullName appendString:suffix];
+ }
+ if (firstName) {
+ if ([fullName isEqualToString:@""] == NO)
+ [fullName appendString:@", "];
+ [fullName appendString:firstName];
+ }
+ if (middleName) {
+ // no comma for a middle name
+ if ([fullName isEqualToString:@""] == NO)
+ [fullName appendString:@" "];
+ // typically just an initial, but the .bst will handle any dot
for abbreviationx
+ [fullName appendString:middleName];
+ }
+ [authorNames addObject:fullName];
+ [fullName release];
+ }
+ else {
+ NSLog(@"Unknown node name %@ in %@", [authorNode name],
authorListNode);
+ }
+ }
+
+ if ([authorNames count])
+ addStringToDictionaryIfNotNil([authorNames componentsJoinedByString:@"
and "], BDSKAuthorString, pubFields);
+ [authorNames release];
+}
+
++ (void)_addMeshNode:(NSXMLNode *)listNode toDictionary:(NSMutableDictionary
*)pubFields
+{
+ if ([[listNode children] count] == 0)
+ return;
+
+ NSMutableString *meshString = [NSMutableString new];
+ NSString *keywordSeparator = [[NSUserDefaults standardUserDefaults]
objectForKey:BDSKDefaultGroupFieldSeparatorKey];
+ NSEnumerator *meshEnum = [[listNode children] objectEnumerator];
+ NSXMLNode *meshNode;
+
+ while (meshNode = [meshEnum nextObject]) {
+ if ([[meshNode name] isEqualToString:@"MeshHeading"]) {
+ NSEnumerator *headingEnum = [[meshNode children] objectEnumerator];
+ NSXMLNode *headingNode;
+
+ while (headingNode = [headingEnum nextObject]) {
+
+ // add descriptor name and ignore qualifier name
+ if ([[headingNode name] isEqualToString:@"DescriptorName"]) {
+ if ([meshString length])
+ [meshString appendString:keywordSeparator];
+ [meshString appendString:[headingNode stringValue]];
+ }
+ }
+ }
+ }
+ [pubFields setObject:meshString forKey:@"Mesh"];
+ [meshString release];
+}
+
++ (void)_addKeywordNode:(NSXMLNode *)listNode
toDictionary:(NSMutableDictionary *)pubFields
+{
+ if ([[listNode children] count] == 0)
+ return;
+
+ NSMutableString *keywordString = [NSMutableString new];
+ NSString *keywordSeparator = [[NSUserDefaults standardUserDefaults]
objectForKey:BDSKDefaultGroupFieldSeparatorKey];
+ NSEnumerator *keywordEnum = [[listNode children] objectEnumerator];
+ NSXMLNode *keywordNode;
+
+ while (keywordNode = [keywordEnum nextObject]) {
+
+ if ([[keywordNode name] isEqualToString:@"Keyword"]) {
+ if ([keywordString length])
+ [keywordString appendString:keywordSeparator];
+ [keywordString appendString:[keywordNode stringValue]];
+ }
+ }
+ [pubFields setObject:keywordString forKey:BDSKKeywordsString];
+ [keywordString release];
+}
+
++ (NSArray *)_itemsFromDocument:(NSXMLDocument *)doc error:(NSError
**)outError;
+{
+ NSArray *articles = [doc nodesForXPath:@"//PubmedArticle" error:outError];
+ NSMutableArray *pubs = [NSMutableArray array];
+ NSEnumerator *articleEnum = [articles objectEnumerator];
+ NSXMLNode *article;
+
+ while (article = [articleEnum nextObject]) {
+
+ NSXMLNode *citation = [article
firstNodeForXPath:@"./MedlineCitation"];
+ NSMutableDictionary *pubFields = [NSMutableDictionary new];
+
+ [self _addJournalNode:[citation
firstNodeForXPath:@"./Article/Journal"] toDictionary:pubFields];
+ [self _addAuthorListNode:[citation
firstNodeForXPath:@"./Article/AuthorList"] toDictionary:pubFields];
+
+ // ex. PMID 16187791
+ [self _addMeshNode:[citation firstNodeForXPath:@"./MeshHeadingList"]
toDictionary:pubFields];
+ [self _addKeywordNode:[citation firstNodeForXPath:@"./KeywordList"]
toDictionary:pubFields];
+
+ NSString *title = [[citation
firstNodeForXPath:@"./Article/ArticleTitle"] stringValue];
+ addStringToDictionaryIfNotNil([title stringByRemovingSuffix:@"."],
BDSKTitleString, pubFields);
+ addStringValueOfNodeForField([citation
firstNodeForXPath:@"./Article/Abstract/AbstractText"], BDSKAbstractString,
pubFields);
+ addStringValueOfNodeForField([citation
firstNodeForXPath:@"./Article/Pagination/MedlinePgn"], BDSKPagesString,
pubFields);
+ addStringValueOfNodeForField([citation firstNodeForXPath:@"./PMID"],
@"Pmid", pubFields);
+
+ // grab the DOI if available
+ NSArray *articleIDs = [article
nodesForXPath:@"./PubmedData/ArticleIdList/ArticleId" error:NULL];
+ NSEnumerator *articleIDEnum = [articleIDs objectEnumerator];
+ NSXMLElement *articleID;
+
+ while (articleID = [articleIDEnum nextObject]) {
+ if ([articleID kind] == NSXMLElementKind && [[[articleID
attributeForName:@"IdType"] stringValue] isEqualToString:@"doi"])
+ addStringValueOfNodeForField(articleID, BDSKDoiString,
pubFields);
+ }
+
+
+ // for debugging
+ if (_addXMLStringToAnnote) addStringToDictionaryIfNotNil([article
XMLStringWithOptions:NSXMLNodePrettyPrint], BDSKAnnoteString, pubFields);
+
+ BibItem *pub = [[BibItem allocWithZone:[self zone]]
initWithType:BDSKArticleString
+
fileType:BDSKBibtexString
+ citeKey:nil
+
pubFields:pubFields
+ isNew:YES];
+ [pubs addObject:pub];
+ [pub release];
+ [pubFields release];
+ }
+
+ return pubs;
+}
+
++ (NSArray *)itemsFromString:(NSString *)itemString error:(NSError **)outError;
+{
+ NSXMLDocument *doc = [[NSXMLDocument allocWithZone:[self zone]]
initWithXMLString:itemString options:NSXMLNodeOptionsNone error:outError];
+ doc = [doc autorelease];
+ return doc ? [self _itemsFromDocument:doc error:outError] : nil;
+}
+
++ (NSArray *)itemsFromData:(NSData *)itemData error:(NSError **)outError;
+{
+ NSXMLDocument *doc = [[NSXMLDocument allocWithZone:[self zone]]
initWithData:itemData options:NSXMLNodeOptionsNone error:outError];
+ doc = [doc autorelease];
+ return doc ? [self _itemsFromDocument:doc error:outError] : nil;
+}
+
+...@end
Modified: trunk/bibdesk/BDSKStringParser.h
===================================================================
--- trunk/bibdesk/BDSKStringParser.h 2009-05-06 13:39:59 UTC (rev 15276)
+++ trunk/bibdesk/BDSKStringParser.h 2009-05-06 14:32:28 UTC (rev 15277)
@@ -51,7 +51,8 @@
BDSKDublinCoreStringType,
BDSKReferStringType,
BDSKMODSStringType,
- BDSKSciFinderStringType
+ BDSKSciFinderStringType,
+ BDSKPubMedXMLStringType
};
// these methods are valid for the abstract class, and should not be used or
defined for a concrete subclass
Modified: trunk/bibdesk/BDSKStringParser.m
===================================================================
--- trunk/bibdesk/BDSKStringParser.m 2009-05-06 13:39:59 UTC (rev 15276)
+++ trunk/bibdesk/BDSKStringParser.m 2009-05-06 14:32:28 UTC (rev 15277)
@@ -49,6 +49,7 @@
#import "BDSKReferParser.h"
#import "BDSKMODSParser.h"
#import "BDSKSciFinderParser.h"
+#import "BDSKPubMedXMLParser.h"
#import "BDSKRuntime.h"
@implementation BDSKStringParser
@@ -87,6 +88,9 @@
case BDSKSciFinderStringType:
parserClass = [BDSKSciFinderParser class];
break;
+ case BDSKPubMedXMLStringType:
+ parserClass = [BDSKPubMedXMLParser class];
+ break;
default:
parserClass = Nil;
}
@@ -141,28 +145,30 @@
@implementation NSString (BDSKStringParserExtensions)
- (NSInteger)contentStringType{
- if([BDSKBibTeXParser canParseString:self])
- return BDSKBibTeXStringType;
- if([BDSKReferenceMinerParser canParseString:self])
- return BDSKReferenceMinerStringType;
- if([BDSKPubMedParser canParseString:self])
- return BDSKPubMedStringType;
- if([BDSKRISParser canParseString:self])
- return BDSKRISStringType;
- if([BDSKMARCParser canParseString:self])
- return BDSKMARCStringType;
- if([BDSKJSTORParser canParseString:self])
- return BDSKJSTORStringType;
- if([BDSKWebOfScienceParser canParseString:self])
- return BDSKWOSStringType;
- if([BDSKBibTeXParser canParseStringAfterFixingKeys:self])
- return BDSKNoKeyBibTeXStringType;
+ if([BDSKBibTeXParser canParseString:self])
+ return BDSKBibTeXStringType;
+ if([BDSKReferenceMinerParser canParseString:self])
+ return BDSKReferenceMinerStringType;
+ if([BDSKPubMedParser canParseString:self])
+ return BDSKPubMedStringType;
+ if([BDSKRISParser canParseString:self])
+ return BDSKRISStringType;
+ if([BDSKMARCParser canParseString:self])
+ return BDSKMARCStringType;
+ if([BDSKJSTORParser canParseString:self])
+ return BDSKJSTORStringType;
+ if([BDSKWebOfScienceParser canParseString:self])
+ return BDSKWOSStringType;
+ if([BDSKBibTeXParser canParseStringAfterFixingKeys:self])
+ return BDSKNoKeyBibTeXStringType;
if([BDSKReferParser canParseString:self])
return BDSKReferStringType;
if([BDSKMODSParser canParseString:self])
return BDSKMODSStringType;
if([BDSKSciFinderParser canParseString:self])
return BDSKSciFinderStringType;
+ if([BDSKPubMedXMLParser canParseString:self])
+ return BDSKPubMedXMLStringType;
// don't check DC, as the check is too unreliable
return BDSKUnknownStringType;
}
Modified: trunk/bibdesk/BibItem_PubMedLookup.m
===================================================================
--- trunk/bibdesk/BibItem_PubMedLookup.m 2009-05-06 13:39:59 UTC (rev
15276)
+++ trunk/bibdesk/BibItem_PubMedLookup.m 2009-05-06 14:32:28 UTC (rev
15277)
@@ -39,13 +39,14 @@
#import "BibItem_PubMedLookup.h"
#import <WebKit/WebKit.h>
#import "BDSKStringParser.h"
+#import "BDSKPubMedXMLParser.h"
#import <AGRegex/AGRegex.h>
#import "NSURL_BDSKExtensions.h"
#import "NSString_BDSKExtensions.h"
#import "PDFMetadata.h"
@interface BDSKPubMedLookupHelper : NSObject
-+ (NSString *)referenceForPubMedSearchTerm:(NSString *)searchTerm;
++ (NSData *)xmlReferenceDataForPMID:(NSString *)searchTerm;
@end
@implementation NSString (PubMedLookup)
@@ -345,8 +346,8 @@
+ (id)itemWithPubMedSearchTerm:(NSString *)searchTerm;
{
- NSString *string = [BDSKPubMedLookupHelper
referenceForPubMedSearchTerm:searchTerm];
- return string ? [[BDSKStringParser itemsFromString:string
ofType:BDSKUnknownStringType error:NULL] lastObject] : nil;
+ NSData *data = [BDSKPubMedLookupHelper xmlReferenceDataForPMID:searchTerm];
+ return [data length] ? [[BDSKPubMedXMLParser itemsFromData:data
error:NULL] lastObject] : nil;
}
@end
@@ -381,12 +382,12 @@
return canConnect;
}
-+ (NSString *)referenceForPubMedSearchTerm:(NSString *) searchTerm;
++ (NSData *)xmlReferenceDataForPMID:(NSString *)searchTerm;
{
NSParameterAssert(searchTerm != nil);
- NSString *toReturn = nil;
-
+ NSData *toReturn = nil;
+
if ([self canConnect] == NO)
return toReturn;
@@ -419,29 +420,12 @@
if ([count intValue] == 1) {
// get the first result (zero-based indexing)
- NSString *efetch = [[[self class] baseURLString]
stringByAppendingFormat:@"/efetch.fcgi?rettype=medline&retmode=text&retstart=0&retmax=1&db=pubmed&query_key=%@&WebEnv=%@&tool=bibdesk",
queryKey, webEnv];
+ NSString *efetch = [[[self class] baseURLString]
stringByAppendingFormat:@"/efetch.fcgi?rettype=abstract&retmode=xml&retstart=0&retmax=1&db=pubmed&query_key=%@&WebEnv=%@&tool=bibdesk",
queryKey, webEnv];
theURL = [NSURL URLWithString:efetch];
BDSKPOSTCONDITION(theURL);
request = [NSURLRequest requestWithURL:theURL
cachePolicy:NSURLRequestUseProtocolCachePolicy timeoutInterval:1.0];
- NSData *efetchResult = [NSURLConnection
sendSynchronousRequest:request returningResponse:&response error:&error];
-
- if (efetchResult) {
-
- // try to get encoding from the http headers; returned nil
when I tried
- NSString *encodingName = [response textEncodingName];
- NSStringEncoding encoding = encodingName ?
CFStringConvertEncodingToNSStringEncoding(CFStringConvertIANACharSetNameToEncoding((CFStringRef)encodingName))
: kCFStringEncodingInvalidId;
-
- if (encoding != kCFStringEncodingInvalidId)
- toReturn = [[NSString alloc] initWithData:efetchResult
encoding:encoding];
- else
- toReturn = [[NSString alloc] initWithData:efetchResult
encoding:NSUTF8StringEncoding];
-
- if (nil == toReturn)
- toReturn = [[NSString alloc] initWithData:efetchResult
encoding:NSISOLatin1StringEncoding];
-
- [toReturn autorelease];
- }
+ toReturn = [NSURLConnection sendSynchronousRequest:request
returningResponse:&response error:&error];
}
[document release];
}
Modified: trunk/bibdesk/Bibdesk.xcodeproj/project.pbxproj
===================================================================
--- trunk/bibdesk/Bibdesk.xcodeproj/project.pbxproj 2009-05-06 13:39:59 UTC
(rev 15276)
+++ trunk/bibdesk/Bibdesk.xcodeproj/project.pbxproj 2009-05-06 14:32:28 UTC
(rev 15277)
@@ -188,6 +188,7 @@
CE6DACC50A503ECF00123185 /* BDSKToolbarItem.m in Sources */ =
{isa = PBXBuildFile; fileRef = CE6DACC30A503ECF00123185 /* BDSKToolbarItem.m
*/; };
CE6DCA230D6A0237003A072F /* BDSKCondition+Scripting.m in
Sources */ = {isa = PBXBuildFile; fileRef = CE6DCA210D6A0237003A072F /*
BDSKCondition+Scripting.m */; };
CE6FB32309DFFCB5005E3E14 /* BDSKSharingBrowser.m in Sources */
= {isa = PBXBuildFile; fileRef = CE6FB32109DFFCB5005E3E14 /*
BDSKSharingBrowser.m */; };
+ CE73BAD10FB1CE9600A43716 /* BDSKPubMedXMLParser.m in Sources */
= {isa = PBXBuildFile; fileRef = CE73BACF0FB1CE9600A43716 /*
BDSKPubMedXMLParser.m */; };
CE7596000ADDB0E1009C1329 /* BDSKContainerView.m in Sources */ =
{isa = PBXBuildFile; fileRef = CE7595FE0ADDB0E0009C1329 /* BDSKContainerView.m
*/; };
CE7611530EA49B6E00301E45 /* BDSKPrintableView.m in Sources */ =
{isa = PBXBuildFile; fileRef = CE7611510EA49B6E00301E45 /* BDSKPrintableView.m
*/; };
CE76A5430B430EA500E61066 /* BDSKSearchGroupSheet.nib in
Resources */ = {isa = PBXBuildFile; fileRef = CE76A53D0B430EA500E61066 /*
BDSKSearchGroupSheet.nib */; };
@@ -1148,6 +1149,8 @@
CE6DCA210D6A0237003A072F /* BDSKCondition+Scripting.m */ = {isa
= PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc;
path = "BDSKCondition+Scripting.m"; sourceTree = "<group>"; };
CE6FB32009DFFCB5005E3E14 /* BDSKSharingBrowser.h */ = {isa =
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path =
BDSKSharingBrowser.h; sourceTree = "<group>"; };
CE6FB32109DFFCB5005E3E14 /* BDSKSharingBrowser.m */ = {isa =
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path
= BDSKSharingBrowser.m; sourceTree = "<group>"; };
+ CE73BACE0FB1CE9600A43716 /* BDSKPubMedXMLParser.h */ = {isa =
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path =
BDSKPubMedXMLParser.h; sourceTree = "<group>"; };
+ CE73BACF0FB1CE9600A43716 /* BDSKPubMedXMLParser.m */ = {isa =
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path
= BDSKPubMedXMLParser.m; sourceTree = "<group>"; };
CE7595FD0ADDB0E0009C1329 /* BDSKContainerView.h */ = {isa =
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path =
BDSKContainerView.h; sourceTree = "<group>"; };
CE7595FE0ADDB0E0009C1329 /* BDSKContainerView.m */ = {isa =
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path
= BDSKContainerView.m; sourceTree = "<group>"; };
CE7611500EA49B6E00301E45 /* BDSKPrintableView.h */ = {isa =
PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path =
BDSKPrintableView.h; sourceTree = "<group>"; };
@@ -2331,6 +2334,7 @@
6C5DE3E60F8FC33B00E02D5F /*
BDSKMathSiteParser.m */,
F9D0E5350BF92768001C6C22 /* BDSKMODSParser.m */,
F9022CA70758038000C3F701 /* BDSKPubMedParser.m
*/,
+ CE73BACF0FB1CE9600A43716 /*
BDSKPubMedXMLParser.m */,
F940D1080B5568E400B5917A /* BDSKReferParser.m
*/,
CE601A4D0AF4C488000B5680 /*
BDSKReferenceMinerParser.m */,
CE600E2E0AF405D6000B5680 /* BDSKRISParser.m */,
@@ -2709,6 +2713,7 @@
CE7611500EA49B6E00301E45 /* BDSKPrintableView.h
*/,
CE565BBD0AEF848B002F0A24 /*
BDSKPublicationsArray.h */,
F9022CA60758038000C3F701 /* BDSKPubMedParser.h
*/,
+ CE73BACE0FB1CE9600A43716 /*
BDSKPubMedXMLParser.h */,
CE392F5B08D04083001CEAC8 /* BDSKRatingButton.h
*/,
CE392ED208D034E4001CEAC8 /*
BDSKRatingButtonCell.h */,
CE95A5790A88883300334DFA /*
BDSKReadMeController.h */,
@@ -3937,6 +3942,7 @@
6CD26A240F928EEE0089FDFD /*
BDSKBibDeskProtocol.m in Sources */,
CEDA7E0D0F96497B00F72C0A /*
NSAlert_BDSKExtensions.m in Sources */,
6CAEE4CD0F98EC63009EA5FE /* BDSKCOinSParser.m
in Sources */,
+ CE73BAD10FB1CE9600A43716 /*
BDSKPubMedXMLParser.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
The NEW KODAK i700 Series Scanners deliver under ANY circumstances! Your
production scanning environment may not be a perfect world - but thanks to
Kodak, there's a perfect scanner to get the job done! With the NEW KODAK i700
Series Scanner you'll get full speed at 300 dpi even with all image
processing features enabled. http://p.sf.net/sfu/kodak-com
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit