https://gcc.gnu.org/g:67e0490922691305699fb17922e44bce304e0505
commit r16-3173-g67e0490922691305699fb17922e44bce304e0505 Author: Robert Dubner <rdub...@symas.com> Date: Tue Aug 12 22:13:59 2025 -0400 cobol: Implement faster zoned decimal to binary conversion. Replace " value *= 10; value += digit" routines with a new one that does two digits at a time and avoids __int128 calculations until they are necessary. These changes also clean up the conversion behavior when a digit is not valid. gcc/cobol/ChangeLog: * genutil.cc (get_binary_value): Use the new routine. libgcobol/ChangeLog: * libgcobol.cc (int128_to_field): Use the new routine. (get_binary_value_local): Use the new routine. (format_for_display_internal): Formatting. (__gg__get_file_descriptor): Likewise. * stringbin.cc (string_from_combined): Formatting. (packed_from_combined): Likewise. (int_from_string): New routine. (__gg__numeric_display_to_binary): Likewise. * stringbin.h (__gg__numeric_display_to_binary): Likewise. Diff: --- gcc/cobol/genutil.cc | 256 +++++++---------------------------- libgcobol/libgcobol.cc | 149 +++++--------------- libgcobol/stringbin.cc | 358 +++++++++++++++++++++++++++++++++++++++++++++++-- libgcobol/stringbin.h | 5 + 4 files changed, 436 insertions(+), 332 deletions(-) diff --git a/gcc/cobol/genutil.cc b/gcc/cobol/genutil.cc index 1c39ff19f338..e131d15d5361 100644 --- a/gcc/cobol/genutil.cc +++ b/gcc/cobol/genutil.cc @@ -752,8 +752,12 @@ get_binary_value( tree value, return; } - static tree pointer = gg_define_variable(UCHAR_P, "..gbv_pointer", vs_file_static); - static tree pend = gg_define_variable(UCHAR_P, "..gbv_pend", vs_file_static); + static tree pointer = gg_define_variable( UCHAR_P, + "..gbv_pointer", + vs_file_static); + static tree pend = gg_define_variable(UCHAR_P, + "..gbv_pend", + vs_file_static); switch(field->type) { @@ -791,8 +795,9 @@ get_binary_value( tree value, // We need to check early on for HIGH-VALUE and LOW-VALUE // Pick up the byte tree digit = gg_get_indirect_reference(source_address, NULL_TREE); - IF( digit, eq_op, build_int_cst(UCHAR, 0xFF) ) + IF( digit, eq_op, build_int_cst(UCHAR, DEGENERATE_HIGH_VALUE) ) { + // We are dealing with HIGH-VALUE if( hilo ) { gg_assign(hilo, integer_one_node); @@ -803,12 +808,14 @@ get_binary_value( tree value, build_int_cst_type( TREE_TYPE(rdigits), get_scaled_rdigits(field))); } - gg_assign(value, build_int_cst_type(TREE_TYPE(value), 0xFFFFFFFFFFFFFFFUL)); + gg_assign(value, build_int_cst_type(TREE_TYPE(value), + 0x7FFFFFFFFFFFFFFFUL)); } ELSE { - IF( digit, eq_op, build_int_cst(UCHAR, 0x00) ) + IF( digit, eq_op, build_int_cst(UCHAR, DEGENERATE_LOW_VALUE) ) { + // We are dealing with LOW-VALUE if( hilo ) { gg_assign(hilo, integer_minus_one_node); @@ -816,26 +823,25 @@ get_binary_value( tree value, } ELSE { - // Establish rdigits: + // We are dealing with an ordinary NumericEdited value + gg_assign(pointer, source_address); + if( rdigits ) { gg_assign(rdigits, - build_int_cst_type( TREE_TYPE(rdigits), - get_scaled_rdigits(field))); + build_int_cst_type(TREE_TYPE(rdigits), + get_scaled_rdigits(field))); } - // Zero out the destination - gg_assign(value, gg_cast(TREE_TYPE(value), integer_zero_node)); - // Pick up a pointer to the source bytes: - - gg_assign(pointer, source_address); - - // This is the we-are-done pointer - gg_assign(pend, gg_add( pointer, - get_any_capacity(field))); - - static tree signbyte = gg_define_variable(UCHAR, "..gbv_signbyte", vs_file_static); - - // The big decision is whether or not the variable is signed: + // This will be the 128-bit value of the character sequence + static tree val128 = gg_define_variable(INT128, + "..gbv_val128", + vs_file_static); + // This is a pointer to the sign byte + static tree signp = gg_define_variable(UCHAR_P, + "..gbv_signp", + vs_file_static); + // We need to figure out where the sign information, if any is to be + // found: if( field->attr & signable_e ) { // The variable is signed @@ -845,12 +851,17 @@ get_binary_value( tree value, if( field->attr & leading_e) { // The first byte is '+' or '-' + gg_assign(signp, source_address); + // Increment pointer to point to the first actual digit gg_increment(pointer); } else { // The final byte is '+' or '-' - gg_decrement(pend); + gg_assign(signp, + gg_add(source_address, + build_int_cst_type( SIZE_T, + field->data.digits))); } } else @@ -858,199 +869,34 @@ get_binary_value( tree value, // The sign byte is internal if( field->attr & leading_e) { - // The first byte has the sign bit. We need to turn it off, - // to make the value positive: - gg_assign(signbyte, - gg_get_indirect_reference(source_address, NULL_TREE)); - // We need to make sure the ascii sign bit is off, for positive - gg_assign(gg_get_indirect_reference(source_address, NULL_TREE), - gg_bitwise_and( signbyte, - build_int_cst_type( UCHAR, - ~NUMERIC_DISPLAY_SIGN_BIT))); + // The first byte has the sign bit. + gg_assign(signp, source_address); } else { - // The final byte has the sign bit. We need to turn it off, - // to make the value positive: - gg_assign(signbyte, - gg_get_indirect_reference(source_address, - build_int_cst_type(SIZE_T, - field->data.capacity-1))); - gg_assign(gg_get_indirect_reference(source_address, - build_int_cst_type( SIZE_T, - field->data.capacity-1)), - gg_bitwise_and( signbyte, - build_int_cst_type( UCHAR, - ~NUMERIC_DISPLAY_SIGN_BIT))); - } - } - } - // We can now set up the byte-by-byte processing loop: - WHILE( pointer, lt_op, pend ) - { - // Pick up the byte - digit = gg_get_indirect_reference(pointer, NULL_TREE); - // Whether ASCII or EBCDIC, the bottom four bits tell the tale: - // Multiply our accumulator by ten: - gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); - // And add in the current digit - gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_bitwise_and(digit, build_int_cst_type(UCHAR, 0x0F))))); - gg_increment(pointer); - } - WEND -#if 0 - if( internal_codeset_is_ebcdic() ) - { - // We are working in EBCDIC - WHILE( pointer, lt_op, pend ) - { - // Pick up the byte - digit = gg_get_indirect_reference(pointer, NULL_TREE); - IF( digit, lt_op, build_int_cst_type(UCHAR, EBCDIC_ZERO) ) - { - // break on a non-digit - gg_assign(pointer, pend); - } - ELSE - { - IF( digit, gt_op, build_int_cst_type(UCHAR, EBCDIC_NINE) ) - { - // break on a non-digit - gg_assign(pointer, pend); - } - ELSE - { - // Whether ASCII or EBCDIC, the bottom four bits tell the tale: - // Multiply our accumulator by ten: - gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); - // And add in the current digit - gg_assign(value, - gg_add(value, gg_cast(TREE_TYPE(value), gg_bitwise_and( digit, - build_int_cst_type(UCHAR, 0x0F) )))); - gg_increment(pointer); - } - ENDIF + // The final byte has the sign bit. + gg_assign(signp, + gg_add(source_address, + build_int_cst_type( SIZE_T, + field->data.digits-1))); } - ENDIF } - WEND } else { - // We are working in ASCII: - WHILE( pointer, lt_op, pend ) - { - // Pick up the byte - digit = gg_get_indirect_reference(pointer, NULL_TREE); - // Whether ASCII or EBCDIC, the bottom four bits tell the tale: - // Multiply our accumulator by ten: - gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); - // And add in the current digit - gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_bitwise_and(digit, build_int_cst_type(UCHAR, 0x0F))))); - gg_increment(pointer); - } - WEND + // This value is unsigned, so just use the first location: + gg_assign(signp, source_address); } -#endif - // Value contains the binary value. The last thing is to apply -- and - // undo -- the signable logic: - - if( field->attr & signable_e ) - { - // The variable is signed - if( field->attr & separate_e ) - { - // The sign byte is separate - if( field->attr & leading_e) - { - // The first byte is '+' or '-' - if( internal_codeset_is_ebcdic() ) - { - // We are operating in EBCDIC, so we look for a 96 (is minus sign) - IF( gg_get_indirect_reference(source_address, NULL_TREE), - eq_op, - build_int_cst_type(UCHAR, 96) ) - { - gg_assign(value, gg_negate(value)); - } - ELSE - ENDIF - } - else - { - // We are operating in ASCII - IF( gg_get_indirect_reference(source_address, NULL_TREE), - eq_op, - build_int_cst_type(UCHAR, '-') ) - { - gg_assign(value, gg_negate(value)); - } - ELSE - ENDIF - } - } - else - { - // The final byte is '+' or '-' - if( internal_codeset_is_ebcdic() ) - { - // We are operating in EBCDIC - IF( gg_get_indirect_reference(source_address, - build_int_cst_type(SIZE_T, - field->data.capacity-1)), - eq_op, - build_int_cst_type(UCHAR, EBCDIC_MINUS) ) - { - gg_assign(value, gg_negate(value)); - } - ELSE - ENDIF - } - else - { - // We are operating in ASCII - IF( gg_get_indirect_reference(source_address, build_int_cst_type(SIZE_T, field->data.capacity-1)), - eq_op, - build_int_cst_type(UCHAR, '-') ) - { - gg_assign(value, gg_negate(value)); - } - ELSE - ENDIF - } - } - } - else - { - // The sign byte is internal. Check the sign bit - IF( gg_bitwise_and(signbyte, - build_int_cst_type(UCHAR, - NUMERIC_DISPLAY_SIGN_BIT)), - ne_op, - build_int_cst_type(UCHAR, 0) ) - { - // The ASCII sign bit was on, so negate the result - gg_assign(value, gg_negate(value)); - } - ELSE - ENDIF - // It's time to put back the original data: - if( field->attr & leading_e) - { - // The first byte has the sign bit: - gg_assign(gg_get_indirect_reference(source_address, NULL_TREE), - signbyte); - } - else - { - // The final byte has the sign bit: - gg_assign(gg_get_indirect_reference(source_address, - build_int_cst_type(SIZE_T, field->data.capacity-1)), - signbyte); - } - } - } + gg_assign(val128, + gg_call_expr( INT128, + "__gg__numeric_display_to_binary", + signp, + pointer, + build_int_cst_type(INT, field->data.digits), + NULL_TREE)); + // Assign the value we got from the string to our "return" value: + gg_assign(value, gg_cast(TREE_TYPE(value), val128)); } ENDIF } diff --git a/libgcobol/libgcobol.cc b/libgcobol/libgcobol.cc index b46fd13f2080..f41434f9f3a0 100644 --- a/libgcobol/libgcobol.cc +++ b/libgcobol/libgcobol.cc @@ -1565,8 +1565,8 @@ int128_to_field(cblc_field_t *var, memset(location, 0, length); size_error = __gg__binary_to_string_internal( PTRCAST(char, location), - length > MAX_FIXED_POINT_DIGITS - ? MAX_FIXED_POINT_DIGITS + length > MAX_FIXED_POINT_DIGITS + ? MAX_FIXED_POINT_DIGITS : length, value); break; @@ -1633,7 +1633,7 @@ int128_to_field(cblc_field_t *var, } } - unsigned char *sign_location = + unsigned char *sign_location = var->attr & leading_e ? location : location + length - 1; if( internal_is_ebcdic ) @@ -1651,7 +1651,7 @@ int128_to_field(cblc_field_t *var, else { // It's a simple positive number - size_error = __gg__binary_to_string_internal( + size_error = __gg__binary_to_string_internal( PTRCAST(char, location), length, value); @@ -1731,12 +1731,12 @@ int128_to_field(cblc_field_t *var, // Assume for the moment that the res unsigned char sign_nybble = 0; - if( var->attr & packed_no_sign_e ) + if( var->attr & packed_no_sign_e ) { // This is COMP-6 packed decimal, with no sign nybble sign_nybble = 0; } - else + else { // This is COMP-3 packed decimal, so we need to make room to the // right of the final decimal digit for the sign nybble: @@ -1770,7 +1770,7 @@ int128_to_field(cblc_field_t *var, /* We need to check if the value is too big, in case our caller wants to check for the error condition. In any event, we need - to make sure the value actually fits, because otherwise the + to make sure the value actually fits, because otherwise the result might have a bad high-place digit for a value with an odd number of places. */ @@ -1780,7 +1780,7 @@ int128_to_field(cblc_field_t *var, // We are now set up to do the conversion: __gg__binary_to_packed(location, digits, value); - + // We can put the sign nybble into place at this point. Note that // for COMP-6 numbers the sign_nybble value is zero, so the next // operation is harmless. @@ -2014,6 +2014,7 @@ get_binary_value_local( int *rdigits, case FldNumericDisplay: { + *rdigits = resolved_var->rdigits; if( resolved_location[resolved_length-1] == DEGENERATE_HIGH_VALUE ) { // This is a degenerate case, which violates the language @@ -2037,12 +2038,12 @@ get_binary_value_local( int *rdigits, // Make it positive by turning off the highest order bit: (PTRCAST(unsigned char, &retval))[sizeof(retval)-1] = 0x3F; - *rdigits = resolved_var->rdigits; } else { + unsigned char *digits; unsigned char *sign_byte_location; - unsigned char ch; + int ndigits; if( resolved_var->attr & signable_e ) { // Pick up the sign byte, and force our value to be positive @@ -2050,130 +2051,42 @@ get_binary_value_local( int *rdigits, && (resolved_var->attr & leading_e ) ) { // LEADING SEPARATE + digits = resolved_location+1; sign_byte_location = resolved_location; - resolved_location += 1; - resolved_length -= 1; - ch = *sign_byte_location; - *sign_byte_location = internal_plus; + ndigits = resolved_length - 1; } else if( (resolved_var->attr & separate_e) && !(resolved_var->attr & leading_e ) ) { // TRAILING SEPARATE + digits = resolved_location; sign_byte_location = resolved_location + resolved_length - 1; - resolved_length -= 1; - ch = *sign_byte_location; - *sign_byte_location = internal_plus; + ndigits = resolved_length - 1; } else if( (resolved_var->attr & leading_e) ) { // LEADING + digits = resolved_location; sign_byte_location = resolved_location; - ch = *sign_byte_location; - turn_sign_bit_off(sign_byte_location); + ndigits = resolved_length; } else // if( !(resolved_var->attr & leading_e) ) { // TRAILING + digits = resolved_location; sign_byte_location = resolved_location + resolved_length - 1; - ch = *sign_byte_location; - turn_sign_bit_off(sign_byte_location); + ndigits = resolved_length; } } - - // We know where the decimal point is because of rdigits. Because - // we know that we have a clean string of digits (either ASCII or - // EBCDIC), we can just build up the result: - - static const uint8_t from_ebcdic[256] = - { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x00 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x10 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x20 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x30 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x50 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x70 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x80 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x90 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xa0 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xb0 - 0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0xc0 - 0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0xd0 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xe0 - 0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0xf0 - }; - - static const uint8_t from_ascii[256] = - { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x00 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x10 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x20 - 0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0x30 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x50 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60 - 0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0x70 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x80 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x90 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xa0 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xb0 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xc0 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xd0 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xe0 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xf0 - }; - - if( internal_is_ebcdic ) - { - for(size_t i=0; i<resolved_length; i++) - { - retval *= 10; - retval += from_ebcdic[resolved_location[i]]; - } - } - else + else { - for(size_t i=0; i<resolved_length; i++) - { - retval *= 10; - retval += from_ascii[resolved_location[i]]; - } - } - - *rdigits = resolved_var->rdigits; - - if( resolved_var->attr & signable_e ) - { - // Restore the sign byte - *sign_byte_location = ch; - - // And if the source is flagged negative, make our result negative - if( ch == internal_minus ) - { - retval = -retval; - } - else - { - if( internal_is_ebcdic ) - { - // EBCDIC characters: - if( (ch & 0xF0) == 0xD0 ) - { - retval = -retval; - } - } - else - { - // ASCII characters: - if( (ch & 0xF0) == 0x70 ) - { - retval = -retval; - } - } - } + digits = resolved_location; + sign_byte_location = resolved_location; + ndigits = resolved_length; } + retval = __gg__numeric_display_to_binary(sign_byte_location, + digits, + ndigits); } break; } @@ -3120,7 +3033,7 @@ format_for_display_internal(char **dest, { // Because a NumericDisplay can have any damned thing as a character, // we are going force things that aren't digits to display as '0' - static const uint8_t ascii_chars[256] = + static const uint8_t ascii_chars[256] = { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x00 '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x10 @@ -3139,7 +3052,7 @@ format_for_display_internal(char **dest, '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0xe0 '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0xf0 }; - static const uint8_t ebcdic_chars[256] = + static const uint8_t ebcdic_chars[256] = { 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x00 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x10 @@ -3239,7 +3152,7 @@ format_for_display_internal(char **dest, // Welcome to COBOL. We might be dealing with a HIGH-VALUE, which // is usually, but not always 0xFF. I am going to handle the 0xFF - // case. When the programmer messes with HIGH-VALUE in the + // case. When the programmer messes with HIGH-VALUE in the // SPECIAL-NAMES ALPHABET clause, then it becomes their problem. // But when it isn't HIGH-VALUE, we don't want to see the effects @@ -13469,7 +13382,7 @@ int __gg__get_file_descriptor(const char *device) { int retval = open(device, O_WRONLY); - + if( retval == -1 ) { char *msg; @@ -13485,7 +13398,7 @@ __gg__get_file_descriptor(const char *device) open_syslog(option, facility); syslog(priority, "%s", msg); } - + // Open a new handle to /dev/stdout, since our caller will be closing it retval = open("/dev/stdout", O_WRONLY); } diff --git a/libgcobol/stringbin.cc b/libgcobol/stringbin.cc index 34ddd8481179..2e88cdf1fb5e 100644 --- a/libgcobol/stringbin.cc +++ b/libgcobol/stringbin.cc @@ -83,7 +83,7 @@ That turns out to be unnecessarily slow. - The routine implemented here uses a divide-and-conquer approach to + The routine implemented here uses a divide-and-conquer approach to minimimizing the number of operations, and when you get down to two digits it does a divide-by-100 and uses the remainder in a table lookup to get the digits. */ @@ -92,10 +92,10 @@ Whether that paranoia is justified (perhaps "digit%10 + '0';" ) would actually be faster) is currently untested. But I figured this would be pretty darn fast. - + Use them when you know the index is between zero and one hundred. */ -static const char digit_low[100] = +static const char digit_low[100] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, @@ -109,9 +109,9 @@ static const char digit_low[100] = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, }; -static const char digit_high[100] = +static const char digit_high[100] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, @@ -126,7 +126,7 @@ static const char digit_high[100] = static char combined_string[128]; static char zero_char; -typedef struct +typedef struct { int start; int run; @@ -146,7 +146,7 @@ string_from_combined(const COMBINED &combined) { COMBINED left; COMBINED right; - + uint16_t v16; switch(combined.run) @@ -163,7 +163,7 @@ string_from_combined(const COMBINED &combined) break; case 3: - // We know that val16 has three digits. + // We know that val16 has three digits. v16 = combined.val16; combined_string[combined.start] = v16 / 100 + zero_char; v16 %= 100; @@ -357,7 +357,7 @@ packed_from_combined(const COMBINED &combined) COMBINED left; COMBINED right; - + switch(combined.run) { case 1: @@ -475,3 +475,343 @@ __gg__binary_to_packed( unsigned char *result, packed_from_combined(combined); memcpy(result, combined_string, length); } + + +unsigned __int128 +int_from_string(const char *psz, int n) + { + /* This is a generalized converter */ + + + /* We are assuming that 64-bit arithmetic is faster than 128-bit arithmetic, + and so we build up a 128-bit result in three 64-bit pieces, and assemble + them at the end. */ + + static const uint8_t lookup[] = + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,0,0,0,0,0, + 10,11,12,13,14,15,16,17,18,19, 0,0,0,0,0,0, + 20,21,22,23,24,25,26,27,28,29, 0,0,0,0,0,0, + 30,31,32,33,34,35,36,37,38,39, 0,0,0,0,0,0, + 40,41,42,43,44,45,46,47,48,49, 0,0,0,0,0,0, + 50,51,52,53,54,55,56,57,58,59, 0,0,0,0,0,0, + 60,61,62,63,64,65,66,67,68,69, 0,0,0,0,0,0, + 70,71,72,73,74,75,76,77,78,79, 0,0,0,0,0,0, + 80,81,82,83,84,85,86,87,88,89, 0,0,0,0,0,0, + 90,91,92,93,94,95,96,97,98,99, 0,0,0,0,0,0, + }; + + unsigned __int128 retval; + + uint64_t top = 0; + uint64_t middle = 0; + uint64_t bottom = 0; + + int count_bottom; + int count_middle; + int count_top; + + // Digits 1 through 18 come from the bottom: + if( n <= 18 ) + { + count_bottom = n; + count_middle = 0; + count_top = 0; + } + else if( n<= 36 ) + { + count_bottom = 18; + count_middle = n - 18; + count_top = 0; + } + else + { + count_bottom = 18; + count_middle = 18; + count_top = n - 36; + } + + if( n & 1 ) + { + // We are dealing with an odd number of digits + if( count_top ) + { + top = *psz++ & 0x0f; + count_top -= 1; + } + else if( count_middle ) + { + middle = *psz++ & 0x0f; + count_middle -= 1; + } + else + { + bottom = *psz++ & 0x0f; + count_bottom -= 1; + } + } + + uint8_t add_me; + + while( count_top ) + { + add_me = *psz++ << 4; + add_me += *psz++ & 0xF; + top *= 100 ; + top += lookup[add_me]; + count_top -= 2; + } + + while( count_middle ) + { + add_me = *psz++ << 4; + add_me += *psz++ & 0xF; + middle *= 100 ; + middle += lookup[add_me]; + count_middle -= 2; + } + + while( count_bottom ) + { + add_me = *psz++ << 4; + add_me += *psz++ & 0xF; + bottom *= 100 ; + bottom += lookup[add_me]; + count_bottom -= 2; + } + + retval = top; + retval *= 1000000000000000000ULL; // 10E18 + + retval += middle; + retval *= 1000000000000000000ULL; + + retval += bottom; + + return retval; + } + +extern "C" +__int128 +__gg__numeric_display_to_binary(unsigned char *signp, + const unsigned char *psz, + int n ) + { + /* This is specific to numeric display values. + + Such values can be unsigned, or they can have leading or trailing + internal sign information, or they can have leading or trailing external + sign information. + + In ASCII, digits are 030; internal sign is has the zone 0x70. + + In EBDIC, normal digits are 0xF0. The sign byte in for a positive + signable number has the zone 0xC0; a negative value has the zone 0xD0. + + A further complication is that it is legal for NumericDisplay values to + have non-digit characters. This is because of REDEFINES, and whatnot. + Some COBOL implementations just look at the bottom four bits of + characters regardless of their legality. I am choosing to have non-legal + characters come back as zero. I do this with tables, so the cost is low. + */ + + /* We are assuming that 64-bit arithmetic is faster than 128-bit arithmetic, + and so we build up a 128-bit result in three 64-bit pieces, and assemble + them at the end. */ + + + static const uint8_t lookup[] = + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,0,0,0,0,0, + 10,11,12,13,14,15,16,17,18,19, 0,0,0,0,0,0, + 20,21,22,23,24,25,26,27,28,29, 0,0,0,0,0,0, + 30,31,32,33,34,35,36,37,38,39, 0,0,0,0,0,0, + 40,41,42,43,44,45,46,47,48,49, 0,0,0,0,0,0, + 50,51,52,53,54,55,56,57,58,59, 0,0,0,0,0,0, + 60,61,62,63,64,65,66,67,68,69, 0,0,0,0,0,0, + 70,71,72,73,74,75,76,77,78,79, 0,0,0,0,0,0, + 80,81,82,83,84,85,86,87,88,89, 0,0,0,0,0,0, + 90,91,92,93,94,95,96,97,98,99, 0,0,0,0,0,0, + }; + + static const uint8_t from_ebcdic[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x00 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x10 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x20 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x30 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x50 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x70 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x80 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x90 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xa0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xb0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xc0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xd0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xe0 + 0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0xf0 + }; + + static const uint8_t from_ascii[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x00 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x10 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x20 + 0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0x30 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x50 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x70 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x80 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x90 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xa0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xb0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xc0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xd0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xe0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xf0 + }; + + __int128 retval; + + uint64_t top = 0; + uint64_t middle = 0; + uint64_t bottom = 0; + + int count_bottom; + int count_middle; + int count_top; + + bool is_negative = false; + + // Pick up the original sign byte: + unsigned char sign_byte = *signp; + + const unsigned char *mapper; + if( internal_is_ebcdic ) + { + mapper = from_ebcdic; + if( sign_byte == EBCDIC_MINUS ) + { + is_negative = true; + } + else if( (sign_byte & 0xF0) == 0xD0 ) + { + is_negative = true; + } + // No matter what the digit, force it to be a valid positive digit by + // forcing the zone to 0xF0. Note that this is harmless if redundant, and + // harmless as well if the data SIGN IS SEPARATE. Whatever we do to this + // byte will be undone at the end of the routine. + *signp |= 0xF0; + } + else + { + mapper = from_ascii; + if( sign_byte == '-' ) + { + is_negative = true; + } + else if( (sign_byte & 0xF0) == 0x70 ) + { + is_negative = true; + + // Make it a valid positive digit by turning the zone to 0x30 + *signp &= 0x3F; + } + } + + // Digits 1 through 18 come from the bottom: + if( n <= 18 ) + { + count_bottom = n; + count_middle = 0; + count_top = 0; + } + else if( n<= 36 ) + { + count_bottom = 18; + count_middle = n - 18; + count_top = 0; + } + else + { + count_bottom = 18; + count_middle = 18; + count_top = n - 36; + } + + if( n & 1 ) + { + // We are dealing with an odd number of digits + if( count_top ) + { + top = mapper[*psz++]; + count_top -= 1; + } + else if( count_middle ) + { + middle = mapper[*psz++]; + count_middle -= 1; + } + else + { + bottom = mapper[*psz++]; + count_bottom -= 1; + } + } + + uint8_t add_me; + + while( count_top ) + { + add_me = mapper[*psz++] << 4; + add_me += mapper[*psz++]; + top *= 100 ; + top += lookup[add_me]; + count_top -= 2; + } + + while( count_middle ) + { + add_me = mapper[*psz++] << 4; + add_me += mapper[*psz++]; + middle *= 100 ; + middle += lookup[add_me]; + count_middle -= 2; + } + + while( count_bottom ) + { + add_me = mapper[*psz++] << 4; + add_me += mapper[*psz++]; + bottom *= 100 ; + bottom += lookup[add_me]; + count_bottom -= 2; + } + + retval = top; + retval *= 1000000000000000000ULL; // 10E18 + + retval += middle; + retval *= 1000000000000000000ULL; + + retval += bottom; + + if( is_negative ) + { + retval = -retval; + } + + // Replace the original sign byte: + *signp = sign_byte; // cppcheck-suppress redundantAssignment + + return retval; + } + + + + + diff --git a/libgcobol/stringbin.h b/libgcobol/stringbin.h index 5ddb441dbff8..db1ec9dde6a2 100644 --- a/libgcobol/stringbin.h +++ b/libgcobol/stringbin.h @@ -44,4 +44,9 @@ void __gg__binary_to_packed( unsigned char *result, int digits, __int128 value); +extern "C" +__int128 __gg__numeric_display_to_binary( unsigned char *sign_byte, + const unsigned char *digits, + int ndigits ); + #endif