Revision: 11119
http://bibdesk.svn.sourceforge.net/bibdesk/?rev=11119&view=rev
Author: amaxwell
Date: 2007-09-21 16:03:20 -0700 (Fri, 21 Sep 2007)
Log Message:
-----------
Performance improvement for format sniffing: only use the first 100 characters
when normalizing spaces and line breaks, since it's pretty expensive.
Modified Paths:
--------------
trunk/bibdesk/BDSKBibTeXParser.m
trunk/bibdesk/BDSKMARCParser.m
trunk/bibdesk/BDSKReferenceMinerParser.m
Modified: trunk/bibdesk/BDSKBibTeXParser.m
===================================================================
--- trunk/bibdesk/BDSKBibTeXParser.m 2007-09-21 22:22:34 UTC (rev 11118)
+++ trunk/bibdesk/BDSKBibTeXParser.m 2007-09-21 23:03:20 UTC (rev 11119)
@@ -105,14 +105,13 @@
AGRegex *btRegex = [[AGRegex alloc] initWithPattern:/* spaces */
@"^[ \\t]*"
/* type of item */
@"@[[:alpha:]]+[ \\t]*[{(]"
- /* spaces */ @"[
\\n\\t]*"
+ /* spaces */ @"[
\\n\\r\\t]*"
/* cite key */
@"[a-zA-Z0-9\\.,:/*!&$^_-]+?"
- /* spaces */ @"[
\\n\\t]*,"
+ /* spaces */ @"[
\\n\\r\\t]*,"
options:AGRegexMultiline];
- // AGRegex doesn't recognize \r as a $, so we normalize it first (bug
#1420791)
- NSString *normalizedString = [string
stringByNormalizingSpacesAndLineBreaks];
- BOOL found = ([btRegex findInString:normalizedString] != nil);
+ // AGRegex doesn't recognize \r as a $ (bug #1420791), but normalizing is
slow; use \r\n in regex instead
+ BOOL found = ([btRegex findInString:string] != nil);
[btRegex release];
return found;
}
@@ -121,10 +120,7 @@
// ^(@[[:alpha:]]+{),?$ will grab either "@type{,eol" or "@type{eol",
which is what we get from Bookends and EndNote, respectively.
// same regex used in -[NSString stringWithPhoneyCiteKeys:]
AGRegex *theRegex = [[AGRegex alloc] initWithPattern:@"^[ [EMAIL
PROTECTED]:alpha:]]+[ \\t]*{[ \\t]*,?$" options:AGRegexMultiline];
-
- // AGRegex doesn't recognize \r as a $, so we normalize it first (bug
#1420791)
- NSString *normalizedString = [string
stringByNormalizingSpacesAndLineBreaks];
- BOOL found = ([theRegex findInString:normalizedString] != nil);
+ BOOL found = ([theRegex findInString:string] != nil);
[theRegex release];
return found;
Modified: trunk/bibdesk/BDSKMARCParser.m
===================================================================
--- trunk/bibdesk/BDSKMARCParser.m 2007-09-21 22:22:34 UTC (rev 11118)
+++ trunk/bibdesk/BDSKMARCParser.m 2007-09-21 23:03:20 UTC (rev 11119)
@@ -402,14 +402,14 @@
- (BOOL)isFormattedMARCString{
AGRegex *regex = [AGRegex regexWithPattern:@"^[ \t]*LDR[ \t]+[
\\-0-9]{5}[a-z]{3}[ \\-a][ a\\-0-9]22[ \\-0-9]{5}[ \\-1-8uz][ \\-a-z][ \\-r]45[
0A-Z]0\n{1,2}[ \t]*[0-9]{3}[ \t]+" options:AGRegexMultiline];
-
- return nil != [regex findInString:[self
stringByNormalizingSpacesAndLineBreaks]];
+ unsigned maxLen = MIN([self length], (unsigned)100);
+ return nil != [regex findInString:[[self substringToIndex:maxLen]
stringByNormalizingSpacesAndLineBreaks]];
}
- (BOOL)isMARCXMLString{
AGRegex *regex = [AGRegex regexWithPattern:@"<record(
xmlns=\"[^<>\"]*\")?>\n *<leader>[ 0-9]{5}[a-z]{3}[ a]{2}22[ 0-9]{5}[ 1-8uz][
a-z][ r]45[ 0A-Z]0</leader>\n *<controlfield tag=\"00[0-9]\">"];
-
- return nil != [regex findInString:[self
stringByNormalizingSpacesAndLineBreaks]];
+ unsigned maxLen = MIN([self length], (unsigned)100);
+ return nil != [regex findInString:[[self substringToIndex:maxLen]
stringByNormalizingSpacesAndLineBreaks]];
}
- (NSString *)stringByFixingFormattedMARCStart{
Modified: trunk/bibdesk/BDSKReferenceMinerParser.m
===================================================================
--- trunk/bibdesk/BDSKReferenceMinerParser.m 2007-09-21 22:22:34 UTC (rev
11118)
+++ trunk/bibdesk/BDSKReferenceMinerParser.m 2007-09-21 23:03:20 UTC (rev
11119)
@@ -56,7 +56,7 @@
@implementation BDSKReferenceMinerParser
+ (BOOL)canParseString:(NSString *)string{
- string = [string stringByNormalizingSpacesAndLineBreaks];
+ string = [[string substringToIndex:MIN([string length], (unsigned)100)]
stringByNormalizingSpacesAndLineBreaks];
return [string isRefMinerPubMedString] || [string isRefMinerLoCString] ||
[string isRefMinerAmazonString];
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit