Revision: 27769
http://sourceforge.net/p/bibdesk/svn/27769
Author: hofman
Date: 2022-07-26 14:45:40 +0000 (Tue, 26 Jul 2022)
Log Message:
-----------
Only use hashed field to generate unique characters for parsed format when
passed as optional parameter. Optional prefix/suffix is only relevant for
optional 0, when hashed field is not relevant.
Modified Paths:
--------------
trunk/bibdesk/BDSKFormatParser.m
trunk/bibdesk/BibDesk.help/Contents/Resources/en.lproj/bibdesk.texi
Modified: trunk/bibdesk/BDSKFormatParser.m
===================================================================
--- trunk/bibdesk/BDSKFormatParser.m 2022-07-24 23:23:41 UTC (rev 27768)
+++ trunk/bibdesk/BDSKFormatParser.m 2022-07-26 14:45:40 UTC (rev 27769)
@@ -64,12 +64,12 @@
endingWith:(NSString *)endString
addingCharacters:(NSUInteger)number
fromRange:(NSRange)charRange
- prefix:(NSString *)prefix
- suffix:(NSString *)suffix
+ prefix:(NSString *)prefixOrField
+ suffix:(NSString *)suffixOrField
forField:(NSString *)fieldName
ofItem:(id <BDSKParseableItem>)pub
inFolder:(NSURL *)papersFolderURL
- isUniversal:(BOOL)isUniversal;
+ format:(NSString *)format;
+ (BOOL)currentString:(NSString *)suggestion
matchesString:(NSString *)baseStr
@@ -106,7 +106,8 @@
static NSString *validCiteKey(NSString *key, id<BDSKParseableItem> pub);
-static NSUInteger hashedTitleOrDOI(id<BDSKParseableItem> pub, BOOL *isDOI);
+static NSUInteger hashedField(id<BDSKParseableItem> pub, NSString *field);
+static BOOL isUniversal(NSString *format);
static BOOL scanOptArg(NSScanner *scanner, NSString **result, BOOL
*lastCharEscaped);
static BOOL scanSignedDigit(NSScanner *scanner, NSUInteger *resultDigit, BOOL
*resultSign);
@@ -729,15 +730,16 @@
if (NO == [scanner
scanUnsignedInteger:&uniqueNumber]) uniqueNumber = 1;
// prefix/suffix useful only for n=0
// unique generator assumes them to be nil otherwise
- if (uniquePrefix && (uniqueNumber > 0 || [uniquePrefix
length] == 0)) {
- [parsedStr appendString:uniquePrefix];
+ // interpreted as field to hash when n>0
+ if ([uniquePrefix length] == 0)
uniquePrefix = nil;
- }
+ else if (uniqueNumber > 0)
+ uniquePrefix = [uniquePrefix fieldName];
+ if ([uniqueSuffix length] == 0)
+ uniqueSuffix = nil;
+ else if (uniqueNumber > 0)
+ uniqueSuffix = [uniqueSuffix fieldName];
parsedStr = [NSMutableString string];
- if (uniqueSuffix && (uniqueNumber > 0 || [uniqueSuffix
length] == 0)) {
- [parsedStr appendString:uniqueSuffix];
- uniqueSuffix = nil;
- }
}
else {
NSLog(@"Specifier %%%C can only
be used once in the format.", specifier);
@@ -804,8 +806,8 @@
endingWith:parsedStr
addingCharacters:uniqueNumber
fromRange:charRange
- prefix:uniquePrefix
- suffix:uniqueSuffix]) {
+ prefix:uniqueNumber ? nil : uniquePrefix
+ suffix:uniqueNumber ? nil : uniqueSuffix]) {
[parsedStr setString:currentStr];
} else {
[parsedStr setString:[self uniqueString:baseParsedStr
@@ -817,7 +819,7 @@
forField:fieldName
ofItem:pub
inFolder:resolvedPapersFolderURL
- isUniversal:[format
isEqualToString:@"%a1:%Y%u2"]]];
+ format:format]];
}
}
@@ -829,12 +831,12 @@
endingWith:(NSString *)endStr
addingCharacters:(NSUInteger)number
fromRange:(NSRange)charRange
- prefix:(NSString *)prefix
- suffix:(NSString *)suffix
+ prefix:(NSString *)prefixOrField
+ suffix:(NSString *)suffixOrField
forField:(NSString *)fieldName
ofItem:(id <BDSKParseableItem>)pub
inFolder:(NSURL *)papersFolderURL
- isUniversal:(BOOL)isUniversal {
+ format:(NSString *)format {
__block NSString *(^uniqueString)(char *, NSUInteger, NSUInteger) =
^NSString *(char *chars, NSUInteger count, NSUInteger i) {
if (i < count) {
@@ -849,9 +851,9 @@
} else {
NSMutableString *str = [NSMutableString stringWithString:baseStr];
if (count) {
- if (prefix) [str appendString:prefix];
+ if (number == 0 && prefixOrField) [str
appendString:prefixOrField];
[str appendFormat:@"%s", chars];
- if (suffix) [str appendString:suffix];
+ if (number == 0 && suffixOrField) [str
appendString:suffixOrField];
}
[str appendString:endStr];
// validate te result
@@ -872,25 +874,34 @@
memset(fallback, '\0', number + 1);
if (number > 0) {
- // first try unique characters based on the hashed doi or title, to
get a deterministic value
- // consistent with the unique cite key of Papers 2 and 3
- BOOL isDOI = NO;
- NSUInteger hash = hashedTitleOrDOI(pub, &isDOI);
- if (hash != NSNotFound) {
- char chars[number + 1];
- memset(chars, '\0', number + 1);
- while (n-- > 0) {
- chars[n] = charRange.location + (hash % charRange.length);
- if (n > 0) hash /= charRange.length;
+ if (prefixOrField) {
+ // first try unique characters based on the hashed field, to get a
deterministic value
+ // consistent with the universal cite key of Papers 2 and 3
+ for (NSString *hashField in [NSArray
arrayWithObjects:prefixOrField, suffixOrField, nil]) {
+ NSUInteger hash = hashedField(pub, hashField);
+ if (hash == NSNotFound) continue;
+ char chars[number + 1];
+ memset(chars, '\0', number + 1);
+ n = number;
+ while (n-- > 0) {
+ chars[n] = charRange.location + (hash % charRange.length);
+ if (n > 0) hash /= charRange.length;
+ }
+ if (isUniversal(format)) {
+ // try to reproduce Paper's universal cite key for doi or
title
+ if ([hashField isEqualToString:BDSKDoiString])
+ chars[0] = 'b' + (hash % 10);
+ else if ([hashField isEqualToString:BDSKTitleString])
+ chars[0] = 't' + (hash % 4);
+ }
+ if (fallback[0] == '\0')
+ strcpy(fallback, chars);
+ if ((uniqueStr = uniqueString(chars, number, number)))
+ return uniqueStr;
}
- if (isUniversal)
- chars[0] = isDOI ? 'b' + (hash % 10) : 't' + (hash % 4);
- strcpy(fallback, chars);
- if ((uniqueStr = uniqueString(chars, number, number)))
- return uniqueStr;
- } else {
+ }
+ if (fallback[0] == '\0')
memset(fallback, NSMaxRange(charRange) - 1, number);
- }
}
// run along characters to find one that is unique
@@ -1309,9 +1320,29 @@
return nil;
}
-static NSUInteger hashedTitleOrDOI(id<BDSKParseableItem> pub, BOOL *isDOI) {
- NSString *string = [pub title];
- if ([NSString isEmptyString:string] == NO) {
+static BOOL isUniversal(NSString *format) {
+ return [format hasPrefix:@"%a1:%Y%u["] && [format hasSuffix:@"]2"] &&
[format rangeOfCharacterFromSet:[[BDSKTypeManager sharedManager]
invalidCharactersForField:BDSKCiteKeyString] options:0 range:NSMakeRange(9,
[format length] - 11)].location == NSNotFound;
+}
+
+static NSUInteger hashedField(id<BDSKParseableItem> pub, NSString *field) {
+ NSString *string = nil;
+
+ if ([field isEqualToString:BDSKDoiString]) {
+ string = [pub stringValueOfField:BDSKDoiString];
+ BOOL isURL = [string rangeOfString:@"://"].location != NSNotFound;
+ NSUInteger i = [string rangeOfString:@"10."].location;
+ if (i == NSNotFound)
+ return NSNotFound;
+ string = [string substringFromIndex:i];
+ if (isURL)
+ string = [string stringByRemovingPercentEncoding];
+ } else {
+ if ([field isEqualToString:BDSKTitleString])
+ string = [pub title];
+ else
+ string = [pub stringValueOfField:field];
+ if ([NSString isEmptyString:string])
+ return NSNotFound;
static NSCharacterSet *specialCharSet = nil;
static NSCharacterSet *ignoredCharSet = nil;
if (specialCharSet == nil) {
@@ -1332,19 +1363,8 @@
string = [string
stringByCollapsingWhitespaceAndRemovingSurroundingWhitespace];
}
- if ([NSString isEmptyString:string] == NO) {
- *isDOI = NO;
- } else {
- string = [pub stringValueOfField:BDSKDoiString];
- BOOL isURL = [string rangeOfString:@"://"].location != NSNotFound;
- NSUInteger i = [string rangeOfString:@"10."].location;
- if (i == NSNotFound)
- return NSNotFound;
- string = [string substringFromIndex:i];
- if (isURL)
- string = [string stringByRemovingPercentEncoding];
- *isDOI = YES;
- }
+ if ([NSString isEmptyString:string])
+ return NSNotFound;
NSData *data = [string dataUsingEncoding:NSUTF8StringEncoding];
return crc32(0, [data bytes], [data length]);
Modified: trunk/bibdesk/BibDesk.help/Contents/Resources/en.lproj/bibdesk.texi
===================================================================
--- trunk/bibdesk/BibDesk.help/Contents/Resources/en.lproj/bibdesk.texi
2022-07-24 23:23:41 UTC (rev 27768)
+++ trunk/bibdesk/BibDesk.help/Contents/Resources/en.lproj/bibdesk.texi
2022-07-26 14:45:40 UTC (rev 27769)
@@ -3842,13 +3842,13 @@
@tab max length
@item @specparam{%u, [][]1}
@tab Unique lowercase letter (use only once)
-@tab prefix, suffix, (min) number of characters
+@tab prefix or field, suffix or field, (min) number of characters
@item @specparam{%U, [][]1}
@tab Unique uppercase letter (use only once)
-@tab prefix, suffix, (min) number of characters
+@tab prefix or field, suffix or field, (min) number of characters
@item @specparam{%n, [][]1}
@tab Unique number (use only once)
-@tab prefix, suffix, (min) number of characters
+@tab prefix or field, suffix or field, (min) number of characters
@item @spec{%0}
@tab Escaped digit (0-9)
@item @spec{%%}
@@ -3878,11 +3878,11 @@
can be used to control the separator between author names and an optional
@samp{et al.} suffix.
For @spec{%A} and @spec{%P}, the first one is the separator between two author
names,
and the second one is the separator between the last name and the first
initial.
-The last character of the @samp{et al.} option can be a digit inidicating the
number of authors
+The last character of the @samp{et al.} option can be a digit indicating the
number of authors
used before the @samp{et al.} suffix, in case that is smaller than the maximum
number of authors.
The optional parameter delimited by square brackets after a title specifier
@spec{%T} denotes
-the maximum length of words to be dropped. If this arguments is missing,
+the maximum length of words to be dropped. If this arguments is missing,
words with length up to 3 are not counted, but they are included.
The optional parameter delimited by square brackets after a file extension
specifier @spec{%E} denotes
@@ -3889,8 +3889,11 @@
a default extension used when the file has no extension.
The optional parameters delimited by square brackets after one of the
@samp{unique} specifiers
-@spec{%u}, @spec{%U} and @spec{%n} are a prefix and a suffix added before or
after the added unique characters.
-These are not inserted when no unique characters are added.
+@spec{%u}, @spec{%U} and @spec{%n} depend on the optional number parameter.
+When the number is 0, these are a prefix and a suffix inserted before or after
the added unique characters, only inserted when the unique characters are added.
+When the number is not 0, these can be field names whose hash value may be
used to generate the unique characters.
+This allows a more stable value that does no depend on the context of other
items.
+For instance for Doi or Title this can reproduce the universal cite key from
Papers 2 and 3.
In the local file format the slash-character (/) has a special meaning, as it
is used
to build the (UNIX style) folder hierarchy. This is usually not what you want
generated
@@ -3932,16 +3935,16 @@
@table @asis
@item Format string:
-@specparam{%a, 1}@bold{:}@spec{%Y}@specparam{%u,2}
+@specparam{%a, 1}@bold{:}@spec{%Y}@specparam{%u, [Doi][Title]2}
@item Resulting value:
-McCracken:2004yc
+McCracken:2004ul
@end table
@table @asis
@item Format string:
-@specparam{%a, 1}@bold{:}@spec{%Y}@spec{%u}
+@specparam{%a, 1}@bold{:}@spec{%Y}@specparam{%n, 0}
@item Resulting value:
-McCracken:2004b
+McCracken:2004a
@end table
@table @asis
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit