Revision: 22523
http://sourceforge.net/p/bibdesk/svn/22523
Author: hofman
Date: 2018-08-24 14:35:53 +0000 (Fri, 24 Aug 2018)
Log Message:
-----------
Get attribute with xpath
Modified Paths:
--------------
trunk/bibdesk/BDSKCOinSParser.m
Modified: trunk/bibdesk/BDSKCOinSParser.m
===================================================================
--- trunk/bibdesk/BDSKCOinSParser.m 2018-08-24 09:37:32 UTC (rev 22522)
+++ trunk/bibdesk/BDSKCOinSParser.m 2018-08-24 14:35:53 UTC (rev 22523)
@@ -64,7 +64,7 @@
@implementation BDSKCOinSParser
-static NSString *hasCOinSNodesXPath = @"./body//span[@class='Z3988' and
string-length(@title)!=0]";
+static NSString *hasCOinSNodesXPath = @"./body//span[@class='Z3988']/@title";
// Claim that the can parse the document if its markup contains the string
Z3988.
+ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{
@@ -94,20 +94,17 @@
// Converts a COins String to a BibItem. All sorts of heuristics and attempts
to interpret the format in there.
+ (BibItem *) parseCOinSString: (NSString *) COinSString {
- NSString * inputString = COinSString;
+ if ([NSString isEmptyString:COinSString])
+ return nil;
- // the XMLString adds the attribute name
- if ([inputString hasCaseInsensitivePrefix:@"title=\""] && [inputString
hasSuffix:@"\""])
- inputString = [inputString substringWithRange:NSMakeRange(7,
[inputString length] - 8)];
+ // COinS has a laughable 'specification' but even that is quite clear
about spaces being percent escaped to %20. It seems microformat geeks seem to
be even lazier/stupider than the people who failed to write an actual spec and
suffer from the misconception that 'URL Encoding' is the same as 'Percent
Escaping', leading to + being used for a space on many sites. To minimise the
impact of that, replace all + by spaces if no occurrences of %20 are found.
+ if ([COinSString rangeOfString:@"%20"].location == NSNotFound)
+ COinSString = [COinSString
stringByReplacingOccurrencesOfString:@"+" withString:@" "];
+
+ NSArray * components = [COinSString componentsSeparatedByString:@"&"];
- if ([inputString rangeOfString:@"%20"].location == NSNotFound) {
- // COinS has a laughable 'specification' but even that is quite
clear about spaces being percent escaped to %20. It seems microformat geeks
seem to be even lazier/stupider than the people who failed to write an actual
spec and suffer from the misconception that 'URL Encoding' is the same as
'Percent Escaping', leading to + being used for a space on many sites. To
minimise the impact of that, replace all + by spaces if no occurrences of %20
are found.
- inputString = [inputString
stringByReplacingOccurrencesOfString:@"+" withString:@" "];
- }
-
-
- NSArray * components = [inputString componentsSeparatedByString:@"&"];
- if ([components count] < 2 ) { return nil; }
+ if ([components count] < 2 )
+ return nil;
NSMutableDictionary *fieldsDict = [NSMutableDictionary dictionary];
NSMutableArray *files = [NSMutableArray array];
@@ -324,12 +321,10 @@
NSArray *nodes = [[[self domDocument] documentElement]
nodesForXPath:hasCOinSNodesXPath];
NSMutableArray *items = [NSMutableArray arrayWithCapacity:[nodes count]];
+ BibItem *bibItem;
for (DOMNode *node in nodes) {
- NSString *title;
- BibItem *bibItem;
- if ((title = [(DOMElement *)node getAttribute:@"title"]) &&
- (bibItem = [BDSKCOinSParser parseCOinSString:title]))
+ if ((bibItem = [BDSKCOinSParser parseCOinSString:[node nodeValue]]))
[items addObject:bibItem];
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit