[gcc r16-3173] cobol: Implement faster zoned decimal to binary conversion.

Robert Dubner via Gcc-cvs Tue, 12 Aug 2025 19:28:05 -0700

https://gcc.gnu.org/g:67e0490922691305699fb17922e44bce304e0505


commit r16-3173-g67e0490922691305699fb17922e44bce304e0505
Author: Robert Dubner <rdub...@symas.com>
Date:   Tue Aug 12 22:13:59 2025 -0400

    cobol: Implement faster zoned decimal to binary conversion.
    
    Replace " value *= 10; value += digit" routines with a new one that does two
    digits at a time and avoids __int128 calculations until they are necessary.
    
    These changes also clean up the conversion behavior when a digit is not 
valid.
    
    gcc/cobol/ChangeLog:
    
            * genutil.cc (get_binary_value): Use the new routine.
    
    libgcobol/ChangeLog:
    
            * libgcobol.cc (int128_to_field): Use the new routine.
            (get_binary_value_local): Use the new routine.
            (format_for_display_internal): Formatting.
            (__gg__get_file_descriptor): Likewise.
            * stringbin.cc (string_from_combined): Formatting.
            (packed_from_combined): Likewise.
            (int_from_string): New routine.
            (__gg__numeric_display_to_binary): Likewise.
            * stringbin.h (__gg__numeric_display_to_binary): Likewise.

Diff:
---
 gcc/cobol/genutil.cc   | 256 +++++++----------------------------
 libgcobol/libgcobol.cc | 149 +++++---------------
 libgcobol/stringbin.cc | 358 +++++++++++++++++++++++++++++++++++++++++++++++--
 libgcobol/stringbin.h  |   5 +
 4 files changed, 436 insertions(+), 332 deletions(-)

diff --git a/gcc/cobol/genutil.cc b/gcc/cobol/genutil.cc
index 1c39ff19f338..e131d15d5361 100644
--- a/gcc/cobol/genutil.cc
+++ b/gcc/cobol/genutil.cc
@@ -752,8 +752,12 @@ get_binary_value( tree value,
     return;
     }
 
-  static tree pointer = gg_define_variable(UCHAR_P, "..gbv_pointer", 
vs_file_static);
-  static tree pend = gg_define_variable(UCHAR_P, "..gbv_pend", vs_file_static);
+  static tree pointer = gg_define_variable( UCHAR_P,
+                                            "..gbv_pointer",
+                                            vs_file_static);
+  static tree pend = gg_define_variable(UCHAR_P,
+                                        "..gbv_pend",
+                                        vs_file_static);
 
   switch(field->type)
     {
@@ -791,8 +795,9 @@ get_binary_value( tree value,
       // We need to check early on for HIGH-VALUE and LOW-VALUE
       // Pick up the byte
       tree digit = gg_get_indirect_reference(source_address, NULL_TREE);
-      IF( digit, eq_op, build_int_cst(UCHAR, 0xFF) )
+      IF( digit, eq_op, build_int_cst(UCHAR, DEGENERATE_HIGH_VALUE) )
         {
+        // We are dealing with HIGH-VALUE
         if( hilo )
           {
           gg_assign(hilo, integer_one_node);
@@ -803,12 +808,14 @@ get_binary_value( tree value,
                     build_int_cst_type( TREE_TYPE(rdigits),
                                         get_scaled_rdigits(field)));
           }
-        gg_assign(value, build_int_cst_type(TREE_TYPE(value), 
0xFFFFFFFFFFFFFFFUL));
+        gg_assign(value, build_int_cst_type(TREE_TYPE(value),
+                                            0x7FFFFFFFFFFFFFFFUL));
         }
       ELSE
         {
-        IF( digit, eq_op, build_int_cst(UCHAR, 0x00) )
+        IF( digit, eq_op, build_int_cst(UCHAR, DEGENERATE_LOW_VALUE) )
           {
+          // We are dealing with LOW-VALUE 
           if( hilo )
             {
             gg_assign(hilo, integer_minus_one_node);
@@ -816,26 +823,25 @@ get_binary_value( tree value,
           }
         ELSE
           {
-          // Establish rdigits:
+          // We are dealing with an ordinary NumericEdited value
+          gg_assign(pointer, source_address);
+
           if( rdigits )
             {
             gg_assign(rdigits,
-                    build_int_cst_type( TREE_TYPE(rdigits),
-                                        get_scaled_rdigits(field)));
+                      build_int_cst_type(TREE_TYPE(rdigits),
+                                         get_scaled_rdigits(field)));
             }
-          // Zero out the destination
-          gg_assign(value, gg_cast(TREE_TYPE(value), integer_zero_node));
-          // Pick up a pointer to the source bytes:
-
-          gg_assign(pointer, source_address);
-
-          // This is the we-are-done pointer
-          gg_assign(pend, gg_add( pointer,
-                                  get_any_capacity(field)));
-
-          static tree signbyte = gg_define_variable(UCHAR, "..gbv_signbyte", 
vs_file_static);
-
-          // The big decision is whether or not the variable is signed:
+          // This will be the 128-bit value of the character sequence
+          static tree val128 = gg_define_variable(INT128,
+                                                  "..gbv_val128",
+                                                  vs_file_static);
+          // This is a pointer to the sign byte
+          static tree signp = gg_define_variable(UCHAR_P,
+                                                  "..gbv_signp",
+                                                  vs_file_static);
+          // We need to figure out where the sign information, if any is to be
+          // found:
           if( field->attr & signable_e )
             {
             // The variable is signed
@@ -845,12 +851,17 @@ get_binary_value( tree value,
               if( field->attr & leading_e)
                 {
                 // The first byte is '+' or '-'
+                gg_assign(signp, source_address);
+                // Increment pointer to point to the first actual digit
                 gg_increment(pointer);
                 }
               else
                 {
                 // The final byte is '+' or '-'
-                gg_decrement(pend);
+                gg_assign(signp,
+                          gg_add(source_address,
+                                build_int_cst_type( SIZE_T,
+                                                    field->data.digits)));
                 }
               }
             else
@@ -858,199 +869,34 @@ get_binary_value( tree value,
               // The sign byte is internal
               if( field->attr & leading_e)
                 {
-                // The first byte has the sign bit.  We need to turn it off,
-                // to make the value positive:
-                gg_assign(signbyte,
-                          gg_get_indirect_reference(source_address, 
NULL_TREE));
-                // We need to make sure the ascii sign bit is off, for positive
-                gg_assign(gg_get_indirect_reference(source_address, NULL_TREE),
-                          gg_bitwise_and( signbyte,
-                                          build_int_cst_type( UCHAR,
-                                                              
~NUMERIC_DISPLAY_SIGN_BIT)));
+                // The first byte has the sign bit.
+                gg_assign(signp, source_address);
                 }
               else
                 {
-                // The final byte has the sign bit.  We need to turn it off,
-                // to make the value positive:
-                gg_assign(signbyte,
-                          gg_get_indirect_reference(source_address,
-                                                    build_int_cst_type(SIZE_T,
-                                                    field->data.capacity-1)));
-                gg_assign(gg_get_indirect_reference(source_address,
-                                                    build_int_cst_type( SIZE_T,
-                                                                        
field->data.capacity-1)),
-                          gg_bitwise_and( signbyte,
-                                          build_int_cst_type( UCHAR,
-                                                              
~NUMERIC_DISPLAY_SIGN_BIT)));
-                }
-              }
-            }
-          // We can now set up the byte-by-byte processing loop:
-          WHILE( pointer, lt_op, pend )
-            {
-            // Pick up the byte
-            digit = gg_get_indirect_reference(pointer, NULL_TREE);
-            // Whether ASCII or EBCDIC, the bottom four bits tell the tale:
-            // Multiply our accumulator by ten:
-            gg_assign(value, gg_multiply(value, 
build_int_cst_type(TREE_TYPE(value), 10)));
-            // And add in the current digit
-            gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), 
gg_bitwise_and(digit, build_int_cst_type(UCHAR, 0x0F)))));
-            gg_increment(pointer);
-            }
-            WEND
-#if 0
-          if( internal_codeset_is_ebcdic() )
-            {
-            // We are working in EBCDIC
-            WHILE( pointer, lt_op, pend )
-              {
-              // Pick up the byte
-              digit = gg_get_indirect_reference(pointer, NULL_TREE);
-              IF( digit, lt_op, build_int_cst_type(UCHAR, EBCDIC_ZERO) )
-                {
-                // break on a non-digit
-                gg_assign(pointer, pend);
-                }
-              ELSE
-                {
-                IF( digit, gt_op, build_int_cst_type(UCHAR, EBCDIC_NINE) )
-                  {
-                  // break on a non-digit
-                  gg_assign(pointer, pend);
-                  }
-                ELSE
-                  {
-                  // Whether ASCII or EBCDIC, the bottom four bits tell the 
tale:
-                  // Multiply our accumulator by ten:
-                  gg_assign(value, gg_multiply(value, 
build_int_cst_type(TREE_TYPE(value), 10)));
-                  // And add in the current digit
-                  gg_assign(value,
-                            gg_add(value, gg_cast(TREE_TYPE(value), 
gg_bitwise_and( digit,
-                                                                               
     build_int_cst_type(UCHAR, 0x0F) ))));
-                  gg_increment(pointer);
-                  }
-                  ENDIF
+                // The final byte has the sign bit.
+                gg_assign(signp,
+                          gg_add(source_address,
+                                build_int_cst_type( SIZE_T,
+                                                    field->data.digits-1)));
                 }
-                ENDIF
               }
-              WEND
             }
           else
             {
-            // We are working in ASCII:
-            WHILE( pointer, lt_op, pend )
-              {
-              // Pick up the byte
-              digit = gg_get_indirect_reference(pointer, NULL_TREE);
-              // Whether ASCII or EBCDIC, the bottom four bits tell the tale:
-              // Multiply our accumulator by ten:
-              gg_assign(value, gg_multiply(value, 
build_int_cst_type(TREE_TYPE(value), 10)));
-              // And add in the current digit
-              gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), 
gg_bitwise_and(digit, build_int_cst_type(UCHAR, 0x0F)))));
-              gg_increment(pointer);
-              }
-              WEND
+            // This value is unsigned, so just use the first location:
+            gg_assign(signp, source_address);
             }
-#endif
 
-          // Value contains the binary value.  The last thing is to apply -- 
and
-          // undo -- the signable logic:
-
-          if( field->attr & signable_e )
-            {
-            // The variable is signed
-            if( field->attr & separate_e )
-              {
-              // The sign byte is separate
-              if( field->attr & leading_e)
-                {
-                // The first byte is '+' or '-'
-                if( internal_codeset_is_ebcdic() )
-                  {
-                  // We are operating in EBCDIC, so we look for a 96 (is minus 
sign)
-                  IF( gg_get_indirect_reference(source_address, NULL_TREE),
-                                                eq_op,
-                                                build_int_cst_type(UCHAR, 96) )
-                    {
-                    gg_assign(value, gg_negate(value));
-                    }
-                  ELSE
-                    ENDIF
-                  }
-                else
-                  {
-                  // We are operating in ASCII
-                  IF( gg_get_indirect_reference(source_address, NULL_TREE),
-                                                eq_op,
-                                                build_int_cst_type(UCHAR, '-') 
)
-                    {
-                    gg_assign(value, gg_negate(value));
-                    }
-                  ELSE
-                    ENDIF
-                  }
-                }
-              else
-                {
-                // The final byte is '+' or '-'
-                if( internal_codeset_is_ebcdic() )
-                  {
-                  // We are operating in EBCDIC
-                  IF( gg_get_indirect_reference(source_address,
-                                                build_int_cst_type(SIZE_T,
-                                                      field->data.capacity-1)),
-                      eq_op,
-                      build_int_cst_type(UCHAR, EBCDIC_MINUS) )
-                    {
-                    gg_assign(value, gg_negate(value));
-                    }
-                  ELSE
-                    ENDIF
-                  }
-                else
-                  {
-                  // We are operating in ASCII
-                  IF( gg_get_indirect_reference(source_address, 
build_int_cst_type(SIZE_T, field->data.capacity-1)),
-                                                eq_op,
-                                                build_int_cst_type(UCHAR, '-') 
)
-                    {
-                    gg_assign(value, gg_negate(value));
-                    }
-                  ELSE
-                    ENDIF
-                  }
-                }
-              }
-            else
-              {
-              // The sign byte is internal.  Check the sign bit
-              IF( gg_bitwise_and(signbyte,
-                                 build_int_cst_type(UCHAR,
-                                                    NUMERIC_DISPLAY_SIGN_BIT)),
-                  ne_op, 
-                  build_int_cst_type(UCHAR, 0) )
-                {
-                // The ASCII sign bit was on, so negate the result
-                gg_assign(value, gg_negate(value));
-                }
-              ELSE
-                ENDIF
-              // It's time to put back the original data:
-              if( field->attr & leading_e)
-                {
-                // The first byte has the sign bit:
-                gg_assign(gg_get_indirect_reference(source_address, NULL_TREE),
-                          signbyte);
-                }
-              else
-                {
-                // The final byte has the sign bit:
-                gg_assign(gg_get_indirect_reference(source_address,
-                                                    build_int_cst_type(SIZE_T, 
field->data.capacity-1)),
-                          signbyte);
-                }
-              }
-            }
+          gg_assign(val128,
+                    gg_call_expr( INT128,
+                                  "__gg__numeric_display_to_binary",
+                                  signp,
+                                  pointer,
+                                  build_int_cst_type(INT, field->data.digits),
+                                  NULL_TREE));
+          // Assign the value we got from the string to our "return" value:
+          gg_assign(value, gg_cast(TREE_TYPE(value), val128));
           }
         ENDIF
         }
diff --git a/libgcobol/libgcobol.cc b/libgcobol/libgcobol.cc
index b46fd13f2080..f41434f9f3a0 100644
--- a/libgcobol/libgcobol.cc
+++ b/libgcobol/libgcobol.cc
@@ -1565,8 +1565,8 @@ int128_to_field(cblc_field_t   *var,
             memset(location, 0, length);
             size_error = __gg__binary_to_string_internal(
                                            PTRCAST(char, location),
-                                           length > MAX_FIXED_POINT_DIGITS 
-                                                    ? MAX_FIXED_POINT_DIGITS 
+                                           length > MAX_FIXED_POINT_DIGITS
+                                                    ? MAX_FIXED_POINT_DIGITS
                                                     : length,
                                            value);
             break;
@@ -1633,7 +1633,7 @@ int128_to_field(cblc_field_t   *var,
                     }
                   }
 
-                unsigned char *sign_location = 
+                unsigned char *sign_location =
                   var->attr & leading_e ? location : location + length - 1;
 
                 if( internal_is_ebcdic )
@@ -1651,7 +1651,7 @@ int128_to_field(cblc_field_t   *var,
             else
               {
               // It's a simple positive number
-              size_error = __gg__binary_to_string_internal( 
+              size_error = __gg__binary_to_string_internal(
                                                     PTRCAST(char, location),
                                                     length,
                                                     value);
@@ -1731,12 +1731,12 @@ int128_to_field(cblc_field_t   *var,
 
             // Assume for the moment that the res
             unsigned char sign_nybble = 0;
-            if( var->attr & packed_no_sign_e ) 
+            if( var->attr & packed_no_sign_e )
               {
               // This is COMP-6 packed decimal, with no sign nybble
               sign_nybble = 0;
               }
-            else 
+            else
               {
               // This is COMP-3 packed decimal, so we need to make room to the
               // right of the final decimal digit for the sign nybble:
@@ -1770,7 +1770,7 @@ int128_to_field(cblc_field_t   *var,
 
             /*  We need to check if the value is too big, in case our caller
                 wants to check for the error condition.  In any event, we need
-                to make sure the value actually fits, because otherwise the 
+                to make sure the value actually fits, because otherwise the
                 result might have a bad high-place digit for a value with an
                 odd number of places. */
 
@@ -1780,7 +1780,7 @@ int128_to_field(cblc_field_t   *var,
 
             // We are now set up to do the conversion:
             __gg__binary_to_packed(location, digits, value);
-            
+
             // We can put the sign nybble into place at this point.  Note that
             // for COMP-6 numbers the sign_nybble value is zero, so the next
             // operation is harmless.
@@ -2014,6 +2014,7 @@ get_binary_value_local(  int                 *rdigits,
 
     case FldNumericDisplay:
       {
+      *rdigits = resolved_var->rdigits;
       if( resolved_location[resolved_length-1] == DEGENERATE_HIGH_VALUE )
         {
         // This is a degenerate case, which violates the language
@@ -2037,12 +2038,12 @@ get_binary_value_local(  int                 *rdigits,
 
         // Make it positive by turning off the highest order bit:
         (PTRCAST(unsigned char, &retval))[sizeof(retval)-1] = 0x3F;
-        *rdigits = resolved_var->rdigits;
         }
       else
         {
+        unsigned char *digits;
         unsigned char *sign_byte_location;
-        unsigned char ch;
+        int ndigits;
         if( resolved_var->attr & signable_e )
           {
           // Pick up the sign byte, and force our value to be positive
@@ -2050,130 +2051,42 @@ get_binary_value_local(  int                 *rdigits,
              && (resolved_var->attr  & leading_e  ) )
             {
             // LEADING SEPARATE
+            digits             = resolved_location+1;
             sign_byte_location = resolved_location;
-            resolved_location += 1;
-            resolved_length -= 1;
-            ch = *sign_byte_location;
-            *sign_byte_location = internal_plus;
+            ndigits = resolved_length - 1;
             }
           else if(    (resolved_var->attr & separate_e)
                   && !(resolved_var->attr & leading_e ) )
             {
             // TRAILING SEPARATE
+            digits             = resolved_location;
             sign_byte_location = resolved_location + resolved_length - 1;
-            resolved_length -= 1;
-            ch = *sign_byte_location;
-            *sign_byte_location = internal_plus;
+            ndigits = resolved_length - 1;
             }
           else if( (resolved_var->attr & leading_e) )
             {
             // LEADING
+            digits             = resolved_location;
             sign_byte_location = resolved_location;
-            ch = *sign_byte_location;
-            turn_sign_bit_off(sign_byte_location);
+            ndigits = resolved_length;
             }
           else // if( !(resolved_var->attr & leading_e) )
             {
             // TRAILING
+            digits             = resolved_location;
             sign_byte_location = resolved_location + resolved_length - 1;
-            ch = *sign_byte_location;
-            turn_sign_bit_off(sign_byte_location);
+            ndigits = resolved_length;
             }
           }
-
-        // We know where the decimal point is because of rdigits.  Because
-        // we know that we have a clean string of digits (either ASCII or
-        // EBCDIC), we can just build up the result:
-
-        static const uint8_t from_ebcdic[256] = 
-          {
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x00
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x10
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x20
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x30
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x50
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x70
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x80
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x90
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xa0
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xb0
-          0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0xc0
-          0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0xd0
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xe0
-          0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0xf0
-          };
-
-        static const uint8_t from_ascii[256] = 
-          {
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x00
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x10
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x20
-          0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0x30
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x50
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60
-          0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0x70
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x80
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x90
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xa0
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xb0
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xc0
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xd0
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xe0
-          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xf0
-          };
-
-        if( internal_is_ebcdic )
-          {
-          for(size_t i=0; i<resolved_length; i++)
-            {
-            retval *= 10;
-            retval += from_ebcdic[resolved_location[i]];
-            }
-          }
-         else
+        else
           {
-          for(size_t i=0; i<resolved_length; i++)
-            {
-            retval *= 10;
-            retval += from_ascii[resolved_location[i]];
-            }
-          }
-
-        *rdigits = resolved_var->rdigits;
-
-        if( resolved_var->attr & signable_e )
-          {
-          // Restore the sign byte
-          *sign_byte_location = ch;
-
-          // And if the source is flagged negative, make our result negative
-          if( ch == internal_minus )
-            {
-            retval = -retval;
-            }
-          else 
-            {
-            if( internal_is_ebcdic )
-              {
-              // EBCDIC characters:
-              if( (ch & 0xF0) == 0xD0 )
-                {
-                retval = -retval;
-                }
-              }
-            else
-              {
-              // ASCII characters:
-              if( (ch & 0xF0) == 0x70 )
-                {
-                retval = -retval;
-                }
-              }
-            }
+          digits             = resolved_location;
+          sign_byte_location = resolved_location;
+          ndigits = resolved_length;
           }
+        retval = __gg__numeric_display_to_binary(sign_byte_location,
+                                                 digits,
+                                                 ndigits);
         }
       break;
       }
@@ -3120,7 +3033,7 @@ format_for_display_internal(char **dest,
       {
       // Because a NumericDisplay can have any damned thing as a character,
       // we are going force things that aren't digits to display as '0'
-      static const uint8_t ascii_chars[256] = 
+      static const uint8_t ascii_chars[256] =
         {
         '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 
'0', '0', // 0x00
         '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 
'0', '0', // 0x10
@@ -3139,7 +3052,7 @@ format_for_display_internal(char **dest,
         '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 
'0', '0', // 0xe0
         '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 
'0', '0', // 0xf0
         };
-      static const uint8_t ebcdic_chars[256] = 
+      static const uint8_t ebcdic_chars[256] =
         {
         
0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,
 // 0x00
         
0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,
 // 0x10
@@ -3239,7 +3152,7 @@ format_for_display_internal(char **dest,
 
           // Welcome to COBOL.  We might be dealing with a HIGH-VALUE, which
           // is usually, but not always 0xFF.  I am going to handle the 0xFF
-          // case.  When the programmer messes with HIGH-VALUE in the 
+          // case.  When the programmer messes with HIGH-VALUE in the
           // SPECIAL-NAMES ALPHABET clause, then it becomes their problem.
 
           // But when it isn't HIGH-VALUE, we don't want to see the effects
@@ -13469,7 +13382,7 @@ int
 __gg__get_file_descriptor(const char *device)
   {
   int retval = open(device, O_WRONLY);
-  
+
   if( retval == -1 )
     {
     char *msg;
@@ -13485,7 +13398,7 @@ __gg__get_file_descriptor(const char *device)
       open_syslog(option, facility);
       syslog(priority, "%s", msg);
       }
-    
+
     // Open a new handle to /dev/stdout, since our caller will be closing it
     retval = open("/dev/stdout", O_WRONLY);
     }
diff --git a/libgcobol/stringbin.cc b/libgcobol/stringbin.cc
index 34ddd8481179..2e88cdf1fb5e 100644
--- a/libgcobol/stringbin.cc
+++ b/libgcobol/stringbin.cc
@@ -83,7 +83,7 @@
 
     That turns out to be unnecessarily slow.
 
-    The routine implemented here uses a divide-and-conquer approach to 
+    The routine implemented here uses a divide-and-conquer approach to
     minimimizing the number of operations, and when you get down to two
     digits it does a divide-by-100 and uses the remainder in a table lookup
     to get the digits. */
@@ -92,10 +92,10 @@
     Whether that paranoia is justified (perhaps "digit%10 + '0';" ) would
     actually be faster) is currently untested.  But I figured this would be
     pretty darn fast.
-    
+
     Use them when you know the index is between zero and one hundred.  */
 
-static const char digit_low[100] = 
+static const char digit_low[100] =
   {
   0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
   0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
@@ -109,9 +109,9 @@ static const char digit_low[100] =
   0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
   };
 
-static const char digit_high[100] = 
+static const char digit_high[100] =
   {
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
   3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
@@ -126,7 +126,7 @@ static const char digit_high[100] =
 static char combined_string[128];
 static char zero_char;
 
-typedef struct 
+typedef struct
   {
   int   start;
   int   run;
@@ -146,7 +146,7 @@ string_from_combined(const COMBINED &combined)
   {
   COMBINED left;
   COMBINED right;
-  
+
   uint16_t v16;
 
   switch(combined.run)
@@ -163,7 +163,7 @@ string_from_combined(const COMBINED &combined)
       break;
 
     case 3:
-      // We know that val16 has three digits.  
+      // We know that val16 has three digits.
       v16 = combined.val16;
       combined_string[combined.start] = v16 / 100 + zero_char;
       v16 %= 100;
@@ -357,7 +357,7 @@ packed_from_combined(const COMBINED &combined)
 
   COMBINED left;
   COMBINED right;
-  
+
   switch(combined.run)
     {
     case 1:
@@ -475,3 +475,343 @@ __gg__binary_to_packed( unsigned char *result,
   packed_from_combined(combined);
   memcpy(result, combined_string, length);
   }
+
+
+unsigned __int128
+int_from_string(const char *psz, int n)
+  {
+  /* This is a generalized converter */
+
+
+  /*  We are assuming that 64-bit arithmetic is faster than 128-bit arithmetic,
+      and so we build up a 128-bit result in three 64-bit pieces, and assemble
+      them at the end.  */
+
+  static const uint8_t lookup[] =
+    {
+     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,0,0,0,0,0,
+    10,11,12,13,14,15,16,17,18,19, 0,0,0,0,0,0,
+    20,21,22,23,24,25,26,27,28,29, 0,0,0,0,0,0,
+    30,31,32,33,34,35,36,37,38,39, 0,0,0,0,0,0,
+    40,41,42,43,44,45,46,47,48,49, 0,0,0,0,0,0,
+    50,51,52,53,54,55,56,57,58,59, 0,0,0,0,0,0,
+    60,61,62,63,64,65,66,67,68,69, 0,0,0,0,0,0,
+    70,71,72,73,74,75,76,77,78,79, 0,0,0,0,0,0,
+    80,81,82,83,84,85,86,87,88,89, 0,0,0,0,0,0,
+    90,91,92,93,94,95,96,97,98,99, 0,0,0,0,0,0,
+    };
+
+  unsigned __int128 retval;
+
+  uint64_t top = 0;
+  uint64_t middle = 0;
+  uint64_t bottom = 0;
+
+  int count_bottom;
+  int count_middle;
+  int count_top;
+
+  // Digits 1 through 18 come from the bottom:
+  if( n <= 18 )
+    {
+    count_bottom = n;
+    count_middle = 0;
+    count_top = 0;
+    }
+  else if( n<= 36 )
+    {
+    count_bottom = 18;
+    count_middle = n - 18;
+    count_top = 0;
+    }
+  else
+    {
+    count_bottom = 18;
+    count_middle = 18;
+    count_top = n - 36;
+    }
+
+  if( n & 1 )
+    {
+    // We are dealing with an odd number of digits
+    if( count_top )
+      {
+      top = *psz++ & 0x0f;
+      count_top -= 1;
+      }
+    else if( count_middle )
+      {
+      middle = *psz++ & 0x0f;
+      count_middle -= 1;
+      }
+    else
+      {
+      bottom = *psz++ & 0x0f;
+      count_bottom -= 1;
+      }
+    }
+
+  uint8_t add_me;
+
+  while( count_top )
+    {
+    add_me  = *psz++ << 4;
+    add_me += *psz++ &  0xF;
+    top *= 100 ;
+    top += lookup[add_me];
+    count_top -= 2;
+    }
+
+  while( count_middle )
+    {
+    add_me  = *psz++ << 4;
+    add_me += *psz++ &  0xF;
+    middle *= 100 ;
+    middle += lookup[add_me];
+    count_middle -= 2;
+    }
+
+  while( count_bottom )
+    {
+    add_me  = *psz++ << 4;
+    add_me += *psz++ &  0xF;
+    bottom *= 100 ;
+    bottom += lookup[add_me];
+    count_bottom -= 2;
+    }
+
+  retval = top;
+  retval *= 1000000000000000000ULL; // 10E18
+
+  retval += middle;
+  retval *= 1000000000000000000ULL;
+
+  retval += bottom;
+
+  return retval;
+  }
+
+extern "C"
+__int128
+__gg__numeric_display_to_binary(unsigned char *signp,
+                          const unsigned char *psz,
+                                int n )
+  {
+  /*  This is specific to numeric display values.
+
+      Such values can be unsigned, or they can have leading or trailing
+      internal sign information, or they can have leading or trailing external
+      sign information.
+
+      In ASCII, digits are 030; internal sign is has the zone 0x70.
+
+      In EBDIC, normal digits are 0xF0.  The sign byte in for a positive
+      signable number has the zone 0xC0; a negative value has the zone 0xD0.
+
+      A further complication is that it is legal for NumericDisplay values to
+      have non-digit characters.  This is because of REDEFINES, and whatnot.
+      Some COBOL implementations just look at the bottom four bits of
+      characters regardless of their legality.  I am choosing to have non-legal
+      characters come back as zero.  I do this with tables, so the cost is low.
+      */
+
+  /*  We are assuming that 64-bit arithmetic is faster than 128-bit arithmetic,
+      and so we build up a 128-bit result in three 64-bit pieces, and assemble
+      them at the end.  */
+
+
+  static const uint8_t lookup[] =
+    {
+     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,0,0,0,0,0,
+    10,11,12,13,14,15,16,17,18,19, 0,0,0,0,0,0,
+    20,21,22,23,24,25,26,27,28,29, 0,0,0,0,0,0,
+    30,31,32,33,34,35,36,37,38,39, 0,0,0,0,0,0,
+    40,41,42,43,44,45,46,47,48,49, 0,0,0,0,0,0,
+    50,51,52,53,54,55,56,57,58,59, 0,0,0,0,0,0,
+    60,61,62,63,64,65,66,67,68,69, 0,0,0,0,0,0,
+    70,71,72,73,74,75,76,77,78,79, 0,0,0,0,0,0,
+    80,81,82,83,84,85,86,87,88,89, 0,0,0,0,0,0,
+    90,91,92,93,94,95,96,97,98,99, 0,0,0,0,0,0,
+    };
+
+  static const uint8_t from_ebcdic[256] =
+    {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x00
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x10
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x20
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x30
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x50
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x70
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x80
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x90
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xa0
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xb0
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xc0
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xd0
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xe0
+    0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0xf0
+    };
+
+  static const uint8_t from_ascii[256] =
+    {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x00
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x10
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x20
+    0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0x30
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x50
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x70
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x80
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x90
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xa0
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xb0
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xc0
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xd0
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xe0
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xf0
+    };
+
+  __int128 retval;
+
+  uint64_t top = 0;
+  uint64_t middle = 0;
+  uint64_t bottom = 0;
+
+  int count_bottom;
+  int count_middle;
+  int count_top;
+
+  bool is_negative = false;
+
+  // Pick up the original sign byte:
+  unsigned char sign_byte = *signp;
+
+  const unsigned char *mapper;
+  if( internal_is_ebcdic )
+    {
+    mapper = from_ebcdic;
+    if( sign_byte == EBCDIC_MINUS )
+      {
+      is_negative = true;
+      }
+    else if( (sign_byte & 0xF0) == 0xD0 )
+      {
+      is_negative = true;
+      }
+    // No matter what the digit, force it to be a valid positive digit by
+    // forcing the zone to 0xF0.  Note that this is harmless if redundant, and
+    // harmless as well if the data SIGN IS SEPARATE.  Whatever we do to this
+    // byte will be undone at the end of the routine.
+    *signp |= 0xF0;
+    }
+  else
+    {
+    mapper = from_ascii;
+    if( sign_byte == '-' )
+      {
+      is_negative = true;
+      }
+    else if( (sign_byte & 0xF0) == 0x70 )
+      {
+      is_negative = true;
+
+      // Make it a valid positive digit by turning the zone to 0x30
+      *signp &= 0x3F;
+      }
+    }
+
+  // Digits 1 through 18 come from the bottom:
+  if( n <= 18 )
+    {
+    count_bottom = n;
+    count_middle = 0;
+    count_top = 0;
+    }
+  else if( n<= 36 )
+    {
+    count_bottom = 18;
+    count_middle = n - 18;
+    count_top = 0;
+    }
+  else
+    {
+    count_bottom = 18;
+    count_middle = 18;
+    count_top = n - 36;
+    }
+
+  if( n & 1 )
+    {
+    // We are dealing with an odd number of digits
+    if( count_top )
+      {
+      top = mapper[*psz++];
+      count_top -= 1;
+      }
+    else if( count_middle )
+      {
+      middle = mapper[*psz++];
+      count_middle -= 1;
+      }
+    else
+      {
+      bottom = mapper[*psz++];
+      count_bottom -= 1;
+      }
+    }
+
+  uint8_t add_me;
+
+  while( count_top )
+    {
+    add_me  = mapper[*psz++] << 4;
+    add_me += mapper[*psz++];
+    top *= 100 ;
+    top += lookup[add_me];
+    count_top -= 2;
+    }
+
+  while( count_middle )
+    {
+    add_me  = mapper[*psz++] << 4;
+    add_me += mapper[*psz++];
+    middle *= 100 ;
+    middle += lookup[add_me];
+    count_middle -= 2;
+    }
+
+  while( count_bottom )
+    {
+    add_me  = mapper[*psz++] << 4;
+    add_me += mapper[*psz++];
+    bottom *= 100 ;
+    bottom += lookup[add_me];
+    count_bottom -= 2;
+    }
+
+  retval = top;
+  retval *= 1000000000000000000ULL; // 10E18
+
+  retval += middle;
+  retval *= 1000000000000000000ULL;
+
+  retval += bottom;
+
+  if( is_negative )
+    {
+    retval = -retval;
+    }
+
+  // Replace the original sign byte:
+  *signp = sign_byte; // cppcheck-suppress redundantAssignment
+
+  return retval;
+  }
+
+
+
+
+
diff --git a/libgcobol/stringbin.h b/libgcobol/stringbin.h
index 5ddb441dbff8..db1ec9dde6a2 100644
--- a/libgcobol/stringbin.h
+++ b/libgcobol/stringbin.h
@@ -44,4 +44,9 @@ void __gg__binary_to_packed( unsigned char *result,
                              int digits,
                              __int128 value);
 
+extern "C"
+__int128 __gg__numeric_display_to_binary( unsigned char *sign_byte,
+                                    const unsigned char *digits,
+                                          int ndigits );
+
 #endif

[gcc r16-3173] cobol: Implement faster zoned decimal to binary conversion.

Reply via email to