Revision: 11119
          http://bibdesk.svn.sourceforge.net/bibdesk/?rev=11119&view=rev
Author:   amaxwell
Date:     2007-09-21 16:03:20 -0700 (Fri, 21 Sep 2007)

Log Message:
-----------
Performance improvement for format sniffing: only use the first 100 characters 
when normalizing spaces and line breaks, since it's pretty expensive.

Modified Paths:
--------------
    trunk/bibdesk/BDSKBibTeXParser.m
    trunk/bibdesk/BDSKMARCParser.m
    trunk/bibdesk/BDSKReferenceMinerParser.m

Modified: trunk/bibdesk/BDSKBibTeXParser.m
===================================================================
--- trunk/bibdesk/BDSKBibTeXParser.m    2007-09-21 22:22:34 UTC (rev 11118)
+++ trunk/bibdesk/BDSKBibTeXParser.m    2007-09-21 23:03:20 UTC (rev 11119)
@@ -105,14 +105,13 @@
 
     AGRegex *btRegex = [[AGRegex alloc] initWithPattern:/* spaces       */ 
@"^[ \\t]*"
                                                         /* type of item */ 
@"@[[:alpha:]]+[ \\t]*[{(]" 
-                                                        /* spaces       */ @"[ 
\\n\\t]*" 
+                                                        /* spaces       */ @"[ 
\\n\\r\\t]*" 
                                                         /* cite key     */ 
@"[a-zA-Z0-9\\.,:/*!&$^_-]+?" 
-                                                        /* spaces       */ @"[ 
\\n\\t]*," 
+                                                        /* spaces       */ @"[ 
\\n\\r\\t]*," 
                                                 options:AGRegexMultiline];
     
-    // AGRegex doesn't recognize \r as a $, so we normalize it first (bug 
#1420791)
-    NSString *normalizedString = [string 
stringByNormalizingSpacesAndLineBreaks];
-    BOOL found = ([btRegex findInString:normalizedString] != nil);
+    // AGRegex doesn't recognize \r as a $ (bug #1420791), but normalizing is 
slow; use \r\n in regex instead
+    BOOL found = ([btRegex findInString:string] != nil);
     [btRegex release];
     return found;
 }
@@ -121,10 +120,7 @@
        // ^(@[[:alpha:]]+{),?$ will grab either "@type{,eol" or "@type{eol", 
which is what we get from Bookends and EndNote, respectively.
     // same regex used in -[NSString stringWithPhoneyCiteKeys:]
        AGRegex *theRegex = [[AGRegex alloc]  initWithPattern:@"^[ [EMAIL 
PROTECTED]:alpha:]]+[ \\t]*{[ \\t]*,?$" options:AGRegexMultiline];
-    
-    // AGRegex doesn't recognize \r as a $, so we normalize it first (bug 
#1420791)
-    NSString *normalizedString = [string 
stringByNormalizingSpacesAndLineBreaks];
-    BOOL found = ([theRegex findInString:normalizedString] != nil);
+    BOOL found = ([theRegex findInString:string] != nil);
     [theRegex release];
                                
     return found;

Modified: trunk/bibdesk/BDSKMARCParser.m
===================================================================
--- trunk/bibdesk/BDSKMARCParser.m      2007-09-21 22:22:34 UTC (rev 11118)
+++ trunk/bibdesk/BDSKMARCParser.m      2007-09-21 23:03:20 UTC (rev 11119)
@@ -402,14 +402,14 @@
 
 - (BOOL)isFormattedMARCString{
     AGRegex *regex = [AGRegex regexWithPattern:@"^[ \t]*LDR[ \t]+[ 
\\-0-9]{5}[a-z]{3}[ \\-a][ a\\-0-9]22[ \\-0-9]{5}[ \\-1-8uz][ \\-a-z][ \\-r]45[ 
0A-Z]0\n{1,2}[ \t]*[0-9]{3}[ \t]+" options:AGRegexMultiline];
-    
-    return nil != [regex findInString:[self 
stringByNormalizingSpacesAndLineBreaks]];
+    unsigned maxLen = MIN([self length], (unsigned)100);
+    return nil != [regex findInString:[[self substringToIndex:maxLen] 
stringByNormalizingSpacesAndLineBreaks]];
 }
 
 - (BOOL)isMARCXMLString{
     AGRegex *regex = [AGRegex regexWithPattern:@"<record( 
xmlns=\"[^<>\"]*\")?>\n *<leader>[ 0-9]{5}[a-z]{3}[ a]{2}22[ 0-9]{5}[ 1-8uz][ 
a-z][ r]45[ 0A-Z]0</leader>\n *<controlfield tag=\"00[0-9]\">"];
-    
-    return nil != [regex findInString:[self 
stringByNormalizingSpacesAndLineBreaks]];
+    unsigned maxLen = MIN([self length], (unsigned)100);
+    return nil != [regex findInString:[[self substringToIndex:maxLen] 
stringByNormalizingSpacesAndLineBreaks]];
 }
 
 - (NSString *)stringByFixingFormattedMARCStart{

Modified: trunk/bibdesk/BDSKReferenceMinerParser.m
===================================================================
--- trunk/bibdesk/BDSKReferenceMinerParser.m    2007-09-21 22:22:34 UTC (rev 
11118)
+++ trunk/bibdesk/BDSKReferenceMinerParser.m    2007-09-21 23:03:20 UTC (rev 
11119)
@@ -56,7 +56,7 @@
 @implementation BDSKReferenceMinerParser
 
 + (BOOL)canParseString:(NSString *)string{
-    string = [string stringByNormalizingSpacesAndLineBreaks];
+    string = [[string substringToIndex:MIN([string length], (unsigned)100)] 
stringByNormalizingSpacesAndLineBreaks];
     return [string isRefMinerPubMedString] || [string isRefMinerLoCString] || 
[string isRefMinerAmazonString];
 }
 


This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit

Reply via email to