https://gcc.gnu.org/g:4cd4ec9bd3de347fbd112d2762e05a607243253c
commit r16-7659-g4cd4ec9bd3de347fbd112d2762e05a607243253c Author: Robert Dubner <[email protected]> Date: Tue Feb 24 09:56:01 2026 -0500 cobol: Increase PIC X(MAX) from 8192 to 2^31. Up until these changes, temporary intermediate alphanumeric string variables have been allocated on the stack. With the design change to a larger limit, that's no longer practical. Such variables are now placed on the heap, and we now have to take pains to free() that memory when we are done with it. gcc/cobol/ChangeLog: * gcobc: Adjust how -fPIC is applied, and other refinements. * gcobol.1: Documentation. * genapi.cc (parser_statement_end): New function. Deallocates temp char strings from the heap. (initialize_variable_internal): Ignore temp char strings. (compare_binary_binary): Formatting. (parser_end_program): Formatting. (parser_init_list): Formatting. (parser_exit_program): Formatting. (program_end_stuff): Formatting. (parser_exit): Formatting. (parser_perform_conditional): Formatting. (perform_outofline_before_until): Formatting. (parser_file_add): Formatting. (mh_source_is_literalA): Formatting. (psa_new_var_decl): Make cblc_field_t for intermediate_e alphanumerics program-static. (parser_symbol_add): Eliminate unnecessary code when type!=FldConditional; change handling of intermediate_e for FldAlphanumerics. * genapi.h (parser_end_program): New declaration. (parser_exit): Formatting. (parser_exit_program): Formatting. (parser_statement_end): New declaration. * lexio.cc (parse_replace_pairs): Change CDF handling. (cdftext::lex_open): Likewise. (cdftext::process_file): Likewise. * parse.y: Changes to MAXIMUM_ALPHA_LENGTH; refine return value types for various intrinsic functions; some CDF handling. * parse_ante.h (MAXLENGTH_FORMATTED_DATE): Eliminate constant. (MAXLENGTH_FORMATTED_TIME): Likewise. (MAXLENGTH_CALENDAR_DATE): Likewise. (MAXLENGTH_FORMATTED_DATETIME): Likewise. (new_alphanumeric): No longer takes a capacity. (intrinsic_return_field): New declaration. (struct ffi_args_t): Changed debug message. (is_among): New declaration. * parse_util.h (intrinsic_return_field): New function. Works with the modified function_descrs[] table. * scan.l: Modified scanning. * scan_ante.h (class input_file_status_t): Likewise. (verify_ws): Likewise. (is_refmod): Likewise. * symbols.cc (symbols_update): Improved comment about a debug message. (symbol_temporaries): New function for temporaries on the heap that will have to be deallocated. (symbol_temporary_alphanumerics): Likewise. (new_temporary_impl): Eliminate MAXIMUM_ALPHA_LENGTH from template. (new_alphanumeric): Eliminate capacity as a parameter. * symbols.h (cbl_dialect_str): Formatting. (MAXIMUM_ALPHA_LENGTH): Change comment and value. (IBM_MAXIMUM_ALPHA_LENGTH): Put parentheses around "size_t(1)<<31". (symbol_temporaries): New declaration. (symbol_temporary_alphanumerics): New declaration. (struct function_descr_t): New comment on ret_type. (new_alphanumeric): New declaration. * util.cc (class cdf_directives_t): CDF processing. (cobol_set_indicator_column): Likewise. (cdf_push_source_format): Likewise. (cdf_pop_source_format): Likewise. (parent_names): Likewise. (cobol_filename): Likewise. (cobol_lineno): Likewise. (cobol_filename_restore): Likewise. libgcobol/ChangeLog: * intrinsic.cc (string_to_dest): Move call to __gg__adjust_dest_size(). (__gg__char): Likewise. (__gg__current_date): Likewise. (__gg__formatted_current_date): Likewise. (__gg__formatted_date): Likewise. (__gg__formatted_datetime): Likewise. (__gg__formatted_time): Likewise. (change_case): Likewise. (__gg__trim): Likewise; fix memory leak. (__gg__reverse): Move call to __gg__adjust_dest_size(). (__gg__locale_compare): Likewise (__gg__locale_date): Likewise (__gg__locale_time): Likewise (__gg__locale_time_from_seconds): Likewise * libgcobol.cc (format_for_display_internal): Make the results of intermediate FldNumericBin5 look nice to a human. (init_var_both): Move call to __gg__adjust_dest_size(). (__gg__get_argc): Move call to __gg__adjust_dest_size(). (__gg__get_argv): Move call to __gg__adjust_dest_size(). (__gg__get_command_line): Move call to __gg__adjust_dest_size(). (__gg__adjust_dest_size): Properly handle intermediate_e allocations. (__gg__adjust_encoding): Move call to __gg__adjust_dest_size(). (__gg__module_name): Move call to __gg__adjust_dest_size(). (__gg__refer_from_string): Move call to __gg__adjust_dest_size(). (__gg__refer_from_psz): Move call to __gg__adjust_dest_size(). (__gg__convert): Move call to __gg__adjust_dest_size(). * posix/shim/lseek.cc: Changes to extended functions. * posix/shim/open.cc (posix_opent): Likewise. (posix_open): Likewise. * posix/udf/posix-open.cbl: Likewise. * posix/udf/posix-read.cbl: Likewise. * posix/udf/posix-write.cbl: Likewise. * xmlparse.cc (sayso): Change to debug message. * posix/udf/posix-ftruncate.cbl: New file. Diff: --- gcc/cobol/gcobc | 65 +++++++--- gcc/cobol/gcobol.1 | 60 ++++----- gcc/cobol/genapi.cc | 219 ++++++++++++++++++++------------ gcc/cobol/genapi.h | 8 +- gcc/cobol/lexio.cc | 23 +++- gcc/cobol/parse.y | 98 ++++++++------ gcc/cobol/parse_ante.h | 37 +++--- gcc/cobol/parse_util.h | 171 ++++++++++++++++--------- gcc/cobol/scan.l | 123 +++++++++--------- gcc/cobol/scan_ante.h | 93 ++++++++++++-- gcc/cobol/symbols.cc | 59 ++++++++- gcc/cobol/symbols.h | 29 +++-- gcc/cobol/util.cc | 42 +++--- libgcobol/intrinsic.cc | 87 ++++++++----- libgcobol/libgcobol.cc | 102 ++++++++++----- libgcobol/posix/shim/lseek.cc | 12 +- libgcobol/posix/shim/open.cc | 68 +++++----- libgcobol/posix/udf/posix-ftruncate.cbl | 23 ++++ libgcobol/posix/udf/posix-open.cbl | 14 +- libgcobol/posix/udf/posix-read.cbl | 4 +- libgcobol/posix/udf/posix-write.cbl | 4 +- libgcobol/xmlparse.cc | 2 +- 22 files changed, 876 insertions(+), 467 deletions(-) diff --git a/gcc/cobol/gcobc b/gcc/cobol/gcobc index f4c805fdcc8b..049f91836fb4 100755 --- a/gcc/cobol/gcobc +++ b/gcc/cobol/gcobc @@ -35,10 +35,20 @@ ## output set the mode variable. Everything else is appended to the ## opts variable. ## -## - -fPIC is added to the command line if $mode is "-shared". That -## option applies only to "certain machines", per the gcc info -## manual. For this script to be portable across machines, -fPIC -## would have to be set more judiciously. +## - -fPIC is added to the command line unless we're producing an +## executable. That option applies only to "certain machines", per +## the gcc info manual. For this script to be portable across +## machines, -fPIC would have to be set more judiciously. + +# To override the default gcobol, set the "gcobol" environment variable. +gcobol="${gcobol:-${0%/*}/gcobol}" + +# If invoked as "gcobcx", execute with tracing enabled. +if [ gcobcx = ${0##*/} ] +then + #cho "parsing args: $@" >&2 + set -x +fi if [ "$COBCPY" ] then @@ -78,6 +88,7 @@ fi exit_status=0 skip_arg= opts="$copydir $includes" +cflags=-fPIC mode=-shared incomparable="has no comparable gcobol option" @@ -163,6 +174,8 @@ do continue fi + #cho "next arg: $opt" >&2 + case $opt in # pass next parameter to GCC @@ -189,9 +202,9 @@ do -D) pending_arg=$opt ;; - -E) opts="$opts $opt -fsyntax-only" - ;; - -echo) echo="echo" + -E) opts="$opts $opt -fsyntax-only" + ;; + -echo) echo="echo" ;; -fec=* | -fno-ec=*) @@ -400,11 +413,24 @@ do ;; -i | --info) warn "$opt" ;; - -I) pending_arg=$opt + -include ) + pending_arg=$opt + ;; + # no-space version: just concatenate + -include* ) + opts="$opts $opt" + ;; + # + # Options that may have a space before the argument, or not + # + -I | -L | -MF | -MT ) + pending_arg=$opt ;; - -I*) + # no-space version: just concatenate + -I* | -L* | -MF* | -MT* ) opts="$opts $opt" ;; + -fimplicit-init) warn "$opt" ;; -j | -job) warn "$opt" @@ -414,8 +440,6 @@ do -K*) warn "$opt" ;; # -l - -L) pending_arg=$opt - ;; --list*) warn "$opt" ;; -m) mode="-shared" @@ -464,7 +488,13 @@ do -v | --verbose) opts="$opts -V" ;; # note: we want -dumpversion to be passed to gcc - -V | --version | -version) opts="$opts --version" + -V | --version | -version) + $gcobol --version | + awk '1 == NR { ver = $3; + $3 = "3.1" + $5 = "(GnuCOBOL emulation using " ver ")" } 1' + exit + # opts="$opts --version" ;; # pass through, strangely -Wall is not supported -w | -W | -Wextra) opts="$opts $opt" @@ -472,10 +502,11 @@ do -Wno-*) no_warn "$opt" ;; - -W*) ignore_arg "$opt" + -W*) warn "$opt" ;; -x) mode= + cflags= ;; -) output_name=a.out # nonnull to prevent overriding gcc default @@ -491,10 +522,9 @@ do ;; -shared) output_name="$output_name".so - opts="$opts -fPIC" ;; esac - opts="$opts -o $output_name" + opts="$opts $cflags -o $output_name" fi opts="$opts $opt" # pass through ;; @@ -504,7 +534,7 @@ done # cobc default: if [ "$static_used" = "" ] then - opts="$opts -fno-static-call"; + opts="-fno-static-call $opts"; fi if [ "$exit_status" -gt 0 ] @@ -512,9 +542,6 @@ then exit $exit_status fi -# To override the default gcobol, set the "gcobol" environment variable. -gcobol="${gcobol:-${0%/*}/gcobol}" - if [ "$dialect" ] then dialect=$(echo $dialect | sed -E 's/[[:alnum:]]+/-dialect &/g') diff --git a/gcc/cobol/gcobol.1 b/gcc/cobol/gcobol.1 index 432977cf5cfe..17b02795e3c6 100644 --- a/gcc/cobol/gcobol.1 +++ b/gcc/cobol/gcobol.1 @@ -1,4 +1,4 @@ - .ds lang COBOL +.ds lang COBOL .ds gcobol GCC\ \*[lang]\ Front-end .ds isostd ISO/IEC 1989:2023 .Dd \& February 2025 @@ -506,7 +506,9 @@ features may be enabled as a warning, or error, or suppressed. .It Fl Wno-apply-commit Warn if APPLY COMMIT is used. .It Fl Wno-bad-line-directive -Warn if malformed %<#line%> directive is encountered. +Warn if malformed +.Ql #line +directive is encountered. .It Fl Wno-binary-long-long Warn if BINARY-LONG-LONG is used. .It Fl Wno-call_giving @@ -1456,54 +1458,54 @@ others. They are listed alphabetically below. .It ABS ACOS ANNUITY ASIN ATAN .It -BASECONVERT BIT-OF BIT-TO-CHAR BOOLEAN-OF-INTEGER BYTE-LENGTH +BASECONVERT \%BIT-OF \%BIT-TO-CHAR \%BOOLEAN-OF-INTEGER \%BYTE-LENGTH .It -CHAR CHAR-NATIONAL COMBINED-DATETIME CONCAT CONVERT COS CURRENT-DATE +CHAR \%CHAR-NATIONAL \%COMBINED-DATETIME CONCAT CONVERT COS \%CURRENT-DATE .It -DATE-OF-INTEGER DATE-TO-YYYYMMDD DAY-OF-INTEGER DAY-TO-YYYYDDD DISPLAY-OF +\%DATE-OF-INTEGER \%DATE-TO-YYYYMMDD \%DAY-OF-INTEGER \%DAY-TO-YYYYDDD \%DISPLAY-OF .It -E EXCEPTION-FILE -EXCEPTION-FILE-N EXCEPTION-LOCATION EXCEPTION-LOCATION-N -EXCEPTION-STATEMENT EXCEPTION-STATUS EXP EXP10 +E \%EXCEPTION-FILE +\%EXCEPTION-FILE-N \%EXCEPTION-LOCATION \%EXCEPTION-LOCATION-N +\%EXCEPTION-STATEMENT \%EXCEPTION-STATUS EXP EXP10 .It -FACTORIAL FIND-STRING -FORMATTED-CURRENT-DATE FORMATTED-DATE FORMATTED-DATETIME -FORMATTED-TIME FRACTION-PART +FACTORIAL \%FIND-STRING +\%FORMATTED-CURRENT-DATE \%FORMATTED-DATE \%FORMATTED-DATETIME +\%FORMATTED-TIME \%FRACTION-PART .It -HEX-OF HEX-TO-CHAR HIGHEST-ALGEBRAIC +\%HEX-OF \%HEX-TO-CHAR \%HIGHEST-ALGEBRAIC .It -INTEGER INTEGER-OF-BOOLEAN INTEGER-OF-DATE INTEGER-OF-DAY -INTEGER-OF-FORMATTED-DATE INTEGER-PART +INTEGER \%INTEGER-OF-BOOLEAN \%INTEGER-OF-DATE \%INTEGER-OF-DAY +\%INTEGER-OF-FORMATTED-DATE \%INTEGER-PART .It -LENGTH LOCALE-COMPARE -LOCALE-DATE LOCALE-TIME LOCALE-TIME-FROM-SECONDS LOG LOG10 LOWER-CASE -LOWEST-ALGEBRAIC +LENGTH \%LOCALE-COMPARE +\%LOCALE-DATE \%LOCALE-TIME \%LOCALE-TIME-FROM-SECONDS LOG LOG10 \%LOWER-CASE +\%LOWEST-ALGEBRAIC .It -MAX MEAN MEDIAN MIDRANGE MIN MOD MODULE-NAME +MAX MEAN MEDIAN MIDRANGE MIN MOD \%MODULE-NAME .It -NATIONAL-OF NUMVAL NUMVAL-C NUMVAL-F ORD +\%NATIONAL-OF NUMVAL \%NUMVAL-C \%NUMVAL-F ORD .It -ORD-MAX ORD-MIN +\%ORD-MAX \%ORD-MIN .It -PI PRESENT-VALUE +PI \%PRESENT-VALUE .It RANDOM RANGE REM REVERSE .It -SECONDS-FROM-FORMATTED-TIME -SECONDS-PAST-MIDNIGHT SIGN SIN SMALLEST-ALGEBRAIC SQRT -STANDARD-COMPARE STANDARD-DEVIATION SUBSTITUTE SUM +\%SECONDS-FROM-FORMATTED-TIME +\%SECONDS-PAST-MIDNIGHT SIGN SIN \%SMALLEST-ALGEBRAIC SQRT +\%STANDARD-COMPARE \%STANDARD-DEVIATION SUBSTITUTE SUM .It -TAN TEST-DATE-YYYYMMDD TEST-DAY-YYYYDDD TEST-FORMATTED-DATETIME -TEST-NUMVAL TEST-NUMVAL-C TEST-NUMVAL-F TRIM +TAN \%TEST-DATE-YYYYMMDD \%TEST-DAY-YYYYDDD \%TEST-FORMATTED-DATETIME +\%TEST-NUMVAL \%TEST-NUMVAL-C \%TEST-NUMVAL-F TRIM .It -ULENGTH UPOS UPPER-CASE +ULENGTH UPOS \%UPPER-CASE USUBSTR USUPPLEMENTARY UUID4 UVALID UWIDTH .It VARIANCE .It -WHEN-COMPILED +\%WHEN-COMPILED .It -YEAR-TO-YYYY +\%YEAR-TO-YYYY .El . .Ss Binary floating point DISPLAY diff --git a/gcc/cobol/genapi.cc b/gcc/cobol/genapi.cc index dcf49c7a90e1..01ea8cd5e013 100644 --- a/gcc/cobol/genapi.cc +++ b/gcc/cobol/genapi.cc @@ -1202,6 +1202,56 @@ parser_statement_begin( const cbl_name_t statement_name, sv_is_i_o = false; } +void +parser_statement_end( const std::list<cbl_field_t*>&flist) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + char *psz = xasprintf(" List has %ld elements", flist.size()); + SHOW_PARSE_TEXT(psz); + free(psz); + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + char *psz = xasprintf(" List has %ld elements", flist.size()); + TRACE1_TEXT(psz); + free(psz); + TRACE1_END + } + if( flist.size() ) + { + for( auto field : flist ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + char *psz = xasprintf("Deallocating %s", field->name); + SHOW_PARSE_TEXT(psz); + free(psz); + } + TRACE1 + { + TRACE1_INDENT + char *psz = xasprintf(" Deallocating %s", field->name); + TRACE1_TEXT(psz); + free(psz); + } + + gg_free(member(field->var_decl_node, "data")); + // Flag this guy as free: + gg_assign(member(field->var_decl_node, "data"), gg_cast(UCHAR_P, null_pointer_node)); + gg_assign(member(field->var_decl_node, "allocated"), gg_cast(SIZE_T, integer_zero_node)); + } + TRACE1 + { + TRACE1_END + } + } + } + static void initialize_variable_internal( cbl_refer_t refer, bool explicitly=false, @@ -1221,7 +1271,9 @@ initialize_variable_internal( cbl_refer_t refer, return; } - if( parsed_var->attr & register_e ) + if( parsed_var->attr & register_e + || ( parsed_var->attr & intermediate_e + && parsed_var->type == FldAlphanumeric) ) { return; } @@ -2138,7 +2190,7 @@ compare_binary_binary(tree return_int, // Let's check for the simplified case where both left and right sides are // little-endian binary values: - + if( is_pure_integer(left_side_ref->field) && is_pure_integer(right_side_ref->field) ) { @@ -3951,7 +4003,7 @@ public: } label_verify; void -parser_end_program(const char *prog_name ) +parser_end_program(const char *prog_name ) { if( gg_trans_unit.function_stack.size() ) { @@ -4096,7 +4148,7 @@ parser_init_list() "__gg__variables_to_init", gg_get_address_of(array), wsclear() ? build_string_literal( - 1, + 1, reinterpret_cast<const char *>(wsclear())) : null_pointer_node, NULL_TREE); @@ -6359,8 +6411,8 @@ void parser_sleep(const cbl_refer_t &seconds) } void -parser_exit_program(void) // exits back to COBOL only, else continue - { +parser_exit_program() + { // exits back to COBOL only, else continue static cbl_label_t this_program = {}; static cbl_refer_t magic_refer(&this_program, false); parser_exit( magic_refer ); @@ -6374,7 +6426,8 @@ parser_exit_program(void) // exits back to COBOL only, else continue static void -program_end_stuff(cbl_refer_t refer, ec_type_t ec) +program_end_stuff(cbl_refer_t refer, + ec_type_t ec) { // This is the moral equivalent of a C "return xyz;". @@ -6462,7 +6515,8 @@ program_end_stuff(cbl_refer_t refer, ec_type_t ec) } void -parser_exit( const cbl_refer_t& refer, ec_type_t ec ) +parser_exit( const cbl_refer_t& refer, + ec_type_t ec ) { Analyze(); SHOW_PARSE @@ -8435,7 +8489,7 @@ parser_perform_conditional( struct cbl_perform_tgt_t *tgt ) // The next instructions that the parser will give us are the conditional // calculation, so the first thing that goes down is the condover: - /* The following NOP is needed to make NEXT OVER PERFORM BEFORE/AFTER UNTIL + /* The following NOP is needed to make NEXT OVER PERFORM BEFORE/AFTER UNTIL behaves properly. */ insert_nop(106); gg_append_statement(tgt->addresses.condover[i].go_to); @@ -8516,7 +8570,7 @@ perform_outofline_before_until(struct cbl_perform_tgt_t *tgt, create_iline_address_pairs(tgt); // Tag the top of the perform - + gg_append_statement(tgt->addresses.top.label); // Go do the conditional calculation: @@ -9776,7 +9830,7 @@ parser_file_add(struct cbl_file_t *file) } if( varies.max < symbol_file_record(file)->data.capacity()) { - const charmap_t *charmap = + const charmap_t *charmap = __gg__get_charmap(current_encoding(display_encoding_e)); varies.min *= charmap->stride(); varies.max *= charmap->stride(); @@ -15911,7 +15965,7 @@ mh_source_is_literalA(const cbl_refer_t &destref, if( ( destref.field->type == FldAlphanumeric || destref.field->type == FldGroup ) && !(destref.field->attr & any_length_e) - && !sourceref.all + && !sourceref.all && !size_error) { // A simple alpha-to-alpha move is possible @@ -16733,12 +16787,20 @@ psa_new_var_decl(cbl_field_t *new_var, const char *external_record_base) && new_var->type != FldLiteralA && new_var->type != FldLiteralN ) { -// new_var_decl = gg_define_variable( cblc_field_type_node, -// base_name, -// vs_static); + gg_variable_scope_t scope = vs_stack; + if( new_var->type == FldAlphanumeric ) + { + // This has to be static, because we are putting the actual memory + // on the heap. But if we put the cblc_field_t on the stack inside + // of a condition, or in a loop, we just keep recreating the field + // without getting freeing the memory. Eventually, with perhaps a + // two-pass compiler, we'll be able to create the stack cblc_field_t + // once per program-id. + scope = vs_static; + } new_var_decl = gg_define_variable( cblc_field_type_node, base_name, - vs_stack); + scope); SET_DECL_MODE(new_var_decl, BLKmode); } else @@ -16930,7 +16992,9 @@ parser_symbol_add(struct cbl_field_t *new_var ) || new_var->type == FldLiteralA ) { - if( new_var->data.initial && new_var->data.capacity() ) + if( new_var->data.initial + && new_var->data.capacity() + && !(new_var->attr & intermediate_e) ) { SHOW_PARSE_INDENT for(size_t i=0; i<new_var->data.capacity(); i++) @@ -16962,29 +17026,7 @@ parser_symbol_add(struct cbl_field_t *new_var ) if( new_var->var_decl_node ) { - if( new_var->type != FldConditional ) - { - // There is a possibility when re-using variables that a temporary that - // was created at compile time might not have a data pointer at run time. - if( new_var->attr & (intermediate_e) ) - { - IF( member(new_var->var_decl_node, "allocated"), - lt_op, - member(new_var->var_decl_node, "capacity") ) - { - gg_free(member(new_var, "data")); - gg_assign(member(new_var, "data"), - gg_cast(UCHAR_P, gg_malloc(new_var->data.capacity()))); - gg_assign(member(new_var, "allocated"), - build_int_cst_type(SIZE_T, new_var->data.capacity())); - } - ELSE - { - } - ENDIF - } - } - else + if( new_var->type == FldConditional ) { gg_assign(new_var->var_decl_node, boolean_false_node); } @@ -17310,7 +17352,9 @@ parser_symbol_add(struct cbl_field_t *new_var ) * 4. cbl_field_data_t::capacity is 0 because it requires no working storage */ - if( new_var->data.capacity() == 0 + if( new_var->data.capacity() == 0 + && !( new_var->type == FldAlphanumeric + && new_var->attr & intermediate_e) && new_var->level != 88 && new_var->type != FldClass && new_var->type != FldLiteralN @@ -17382,7 +17426,7 @@ parser_symbol_add(struct cbl_field_t *new_var ) bytes_to_allocate = 1; } - if( !bytes_to_allocate ) + if( !bytes_to_allocate && !(new_var->attr & intermediate_e) ) { cbl_internal_error( "%<bytes_to_allocate%> is zero for %s (symbol number " HOST_SIZE_T_PRINT_DEC ")", @@ -17408,49 +17452,60 @@ parser_symbol_add(struct cbl_field_t *new_var ) } } - if( bytes_to_allocate ) + if( new_var->attr & intermediate_e + && new_var->type == FldAlphanumeric ) { - // We need a unique name for the allocated data for this COBOL variable: - char achDataName[256]; - if( new_var->attr & external_e ) - { - sprintf(achDataName, "%s", new_var->name); - } - else if( new_var->name[0] == '_' ) - { - // Avoid doubling up on leading underscore - sprintf(achDataName, - "%s_data_" HOST_SIZE_T_PRINT_UNSIGNED, - new_var->name, - (fmt_size_t)sv_data_name_counter++); - } - else + // We don't allocate here for intermediates. We instead use + // malloc() in the library when a run-time value is assigned to this + // variable + data_area = null_pointer_node; + } + else + { + if( bytes_to_allocate ) { - sprintf(achDataName, - "_%s_data_" HOST_SIZE_T_PRINT_UNSIGNED, - new_var->name, - (fmt_size_t)sv_data_name_counter++); - } + // We need a unique name for the allocated data for this COBOL variable: + char achDataName[256]; + if( new_var->attr & external_e ) + { + sprintf(achDataName, "%s", new_var->name); + } + else if( new_var->name[0] == '_' ) + { + // Avoid doubling up on leading underscore + sprintf(achDataName, + "%s_data_" HOST_SIZE_T_PRINT_UNSIGNED, + new_var->name, + (fmt_size_t)sv_data_name_counter++); + } + else + { + sprintf(achDataName, + "_%s_data_" HOST_SIZE_T_PRINT_UNSIGNED, + new_var->name, + (fmt_size_t)sv_data_name_counter++); + } - if( new_var->attr & external_e ) - { - tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate); - new_var->data_decl_node = gg_define_variable( - array_type, - achDataName, - vs_external); - data_area = gg_get_address_of(new_var->data_decl_node); - } - else - { - gg_variable_scope_t vs_scope = (new_var->attr & intermediate_e) - ? vs_stack : vs_static ; - tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate); - new_var->data_decl_node = gg_define_variable( - array_type, - achDataName, - vs_scope); - data_area = gg_get_address_of(new_var->data_decl_node); + if( new_var->attr & external_e ) + { + tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate); + new_var->data_decl_node = gg_define_variable( + array_type, + achDataName, + vs_external); + data_area = gg_get_address_of(new_var->data_decl_node); + } + else + { + gg_variable_scope_t vs_scope = (new_var->attr & intermediate_e) + ? vs_stack : vs_static ; + tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate); + new_var->data_decl_node = gg_define_variable( + array_type, + achDataName, + vs_scope); + data_area = gg_get_address_of(new_var->data_decl_node); + } } } } diff --git a/gcc/cobol/genapi.h b/gcc/cobol/genapi.h index 1ae7d96bfbd2..6bba662f2062 100644 --- a/gcc/cobol/genapi.h +++ b/gcc/cobol/genapi.h @@ -272,15 +272,16 @@ parser_see_stop_run( struct cbl_refer_t exit_status, const char name[] ); void parser_program_hierarchy( const struct cbl_prog_hier_t& hier ); void -parser_end_program(const char *name=NULL); +parser_end_program(const char *name); void parser_sleep(const cbl_refer_t &seconds); -void parser_exit( const cbl_refer_t& refer, ec_type_t = ec_none_e ); +void parser_exit( const cbl_refer_t& refer, + ec_type_t = ec_none_e ); void parser_exit_section(void); void parser_exit_paragraph(void); void parser_exit_perform( struct cbl_perform_tgt_t *tgt, bool cycle ); -void parser_exit_program(void); // exits back to COBOL only, else continue +void parser_exit_program(); // exits back to COBOL only, else continue void parser_exhibit( bool changed, bool named, @@ -618,6 +619,7 @@ void parser_init_list(); tree file_static_variable(tree type, const char *name); void parser_statement_begin( const cbl_name_t name, tree ecs, tree dcls ); +void parser_statement_end( const std::list<cbl_field_t*>& ); tree parser_compile_ecs( const std::vector<uint64_t>& ecs ); tree parser_compile_dcls( const std::vector<uint64_t>& dcls ); diff --git a/gcc/cobol/lexio.cc b/gcc/cobol/lexio.cc index fe4343e5d1dd..48fc8cfe6bba 100644 --- a/gcc/cobol/lexio.cc +++ b/gcc/cobol/lexio.cc @@ -36,6 +36,8 @@ #include "copybook.h" #include "lexio.h" +#include <iostream> + extern int yy_flex_debug; source_format_t& cdf_source_format(); @@ -814,8 +816,8 @@ static std::pair<std::list<replace_t>, char *> parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { std::list<replace_t> pairs ; - static const char any_ch[] = "."; - static const char word_ch[] = "[[:alnum:]$_-]"; + static const char any_ch[] = ""; + //// const char word_ch[] = "[[:alnum:]$_-]"; static const char nonword_ch[] = "[^[:alnum:]\"'$_-]"; // Pattern to find one REPLACE pseudo-text pair @@ -878,10 +880,10 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { if( parsed.leading_trailing.size() > 0 ) { switch( TOUPPER(parsed.leading_trailing.p[0]) ) { case 'L': // leading - befter[1] = word_ch; + befter[1] = any_ch; break; case 'T': // trailing - befter[0] = word_ch; + befter[0] = any_ch; break; default: gcc_unreachable(); @@ -1513,21 +1515,26 @@ cdftext::lex_open( const char filename[] ) { if( input == -1 ) return NULL; int output = open_output(); - + size_t n =0; + // Process any files supplied by the -include command-line option. for( auto name : included_files ) { + int input; // cppcheck-suppress shadowVariable if( -1 == (input = open(name, O_RDONLY)) ) { cbl_message(LexIncludeE, "cannot open %<-include%> file %qs", name); continue; } + dbgmsg("lex_open: including %zu of %zu: '%s'", ++n, included_files.size(), name); cobol_filename(name, inode_of(input)); filespan_t mfile( free_form_reference_format( input ) ); process_file( mfile, output ); + dbgmsg("lex_open: processed %zu of %zu: '%s'", n, included_files.size(), name); cobol_filename_restore(); // process_file restores only for COPY } included_files.clear(); + dbgmsg("lex_open: '%s'", filename); cobol_filename(filename, inode_of(input)); filespan_t mfile( free_form_reference_format( input ) ); @@ -1898,6 +1905,12 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) { // parse CDF directives while( mfile.next_line() ) { + if( false ) { + std::string line( mfile.ccur(), const_cast<const char *>(mfile.eol) ); + std::cerr << __func__ << ": " + << mfile.lineno() << ":" << mfile.colno() << ": " + << line; + } yylloc = mfile.as_location(); auto copied = parse_copy_directive(mfile); if( copied.parsed && copied.fd != -1 ) { diff --git a/gcc/cobol/parse.y b/gcc/cobol/parse.y index 875f7cfa23dc..3ab0daa4c18d 100644 --- a/gcc/cobol/parse.y +++ b/gcc/cobol/parse.y @@ -727,7 +727,7 @@ class locale_tgt_t { %type <field> log_term rel_expr rel_abbr eval_abbr %type <refer> num_value num_term value factor -%type <refer> simple_cond bool_expr +%type <refer> simple_cond bool_expr until_expr %type <log_expr_t> log_expr rel_abbrs eval_abbrs %type <rel_term_t> rel_term rel_term1 @@ -4152,7 +4152,7 @@ data_descr1: level_name // Check COMP-5 capacity // No capacity means no PICTURE, valid only for a (potential) group - if( $field->type == FldNumericBin5 ) { + if( is_among( $field->type, {FldNumericBinary, FldNumericBin5} ) ) { if( $field->data.capacity() == 0 ) { if( has_clause ($data_clauses, usage_clause_e) && !has_clause ($data_clauses, picture_clause_e) ) { @@ -4567,11 +4567,11 @@ picture_clause: PIC signed nps[fore] nines nps[aft] } assert(0 < $nchar); field->data.picture = nullptr; - auto nchar = std::min($nchar, MAXIMUM_ALPHA_LENGTH); - if( nchar < $nchar ) { + auto nchar = std::min(size_t($nchar), MAXIMUM_ALPHA_LENGTH); + if( nchar < size_t($nchar) ) { error_msg(@2, "alphanumeric data-item size (%d) " - "exceeds maximum of %d bytes", - $nchar, MAXIMUM_ALPHA_LENGTH); + "exceeds maximum of %lu bytes", + $nchar, (unsigned long)MAXIMUM_ALPHA_LENGTH); } field->set_initial(nchar, @nchar); } @@ -4898,7 +4898,7 @@ value_clause: VALUE all LITERAL[lit] { } } } - | VALUE all cce_expr[cce] { + | VALUE all const_value[cce] { /* * cce has two parts: * cce.r) Host binary value @@ -4950,7 +4950,11 @@ value_clause: VALUE all LITERAL[lit] { } | VALUE error { - error_msg(@2, "invalid VALUE"); + if( 0 < yychar ) { + error_msg(@2, "invalid VALUE at %qs", keyword_str(yychar)); + } else { + error_msg(@2, "invalid VALUE"); + } } ; @@ -5391,8 +5395,14 @@ sentence: statements '.' } ; -statements: statement { $$ = $1; } - | statements statement { $$ = $2; } +statements: statement { + $$ = $1; + parser_statement_end( symbol_temporary_alphanumerics() ); + } + | statements statement { + $$ = $2; + parser_statement_end( symbol_temporary_alphanumerics() ); + } ; statement: error { @@ -6383,8 +6393,8 @@ simple_cond: kind_of_name $$ = new_reference(new_temporary(FldConditional)); parser_relop($$->field, lhs, eq_op, rhs); } - | expr NOT OMITTED - { + | expr /* IS */ NOT OMITTED + { // IS captured by lexer auto lhs = cbl_refer_t($expr->field); lhs.addr_of = true; auto rhs = cbl_field_of(symbol_field(0,0, "NULLS")); @@ -6424,6 +6434,13 @@ kind_of_name: expr might_be variable_type } ; +until_expr: bool_expr + | EXIT { + auto e = symbol_at(very_true_register()); + $$ = new_reference(cbl_field_of(e)); + } + ; + bool_expr: log_expr { $$ = new_reference($1->resolve()); } ; @@ -7992,15 +8009,15 @@ perform_until: test_before perform_cond } ; perform_cond: UNTIL { parser_perform_conditional( &perform_current()->tgt); } - bool_expr + until_expr[expr] { parser_perform_conditional_end( &perform_current()->tgt); - if( !is_conditional($bool_expr) ) { + if( !is_conditional($expr) ) { error_msg(@1, "%s is not a condition expression", - name_of($bool_expr->field)); + name_of($expr->field)); YYERROR; } - $$ = $bool_expr->cond(); + $$ = $expr->cond(); } ; @@ -10238,6 +10255,13 @@ ffi_by_ref: scalar_arg[refer] cbl_refer_t *r = new_reference(new_literal(@1, $1, quoted_e)); $$ = new cbl_ffi_arg_t(by_content_e, r); } + | num_literal + { + cbl_message(@1, MfCallLiteral, + "cannot pass %qs BY REFERENCE", $1->data.initial); + cbl_refer_t *r = new_reference($1); + $$ = new cbl_ffi_arg_t(by_content_e, r); + } | ADDRESS OF scalar_arg[refer] { $$ = new cbl_ffi_arg_t(by_reference_e, $refer, address_of_e); @@ -10840,9 +10864,7 @@ intrinsic: function_udf keyword_str($1), (long)(p - args.data()), name_of(p->field) ); YYERROR; } - $$ = is_numeric(args[0].field)? - new_tempnumeric_float() : - new_alphanumeric(); + $$ = intrinsic_return_field($1, args); $$->data.initial = keyword_str($1); parser_intrinsic_callv( $$, intrinsic_cname($1), args.size(), args.data() ); @@ -10870,7 +10892,7 @@ intrinsic: function_udf | BASECONVERT '(' varg[r1] varg[r2] varg[r3] ')' { location_set(@1); - $$ = new_tempnumeric("BASECONVERT"); + $$ = new_alphanumeric("BASECONVERT"); cbl_unimplemented("BASECONVERT"); if( ! intrinsic_call_3($$, BASECONVERT, $r1, $r2, $r3 )) YYERROR; } @@ -10881,7 +10903,7 @@ intrinsic: function_udf } | CHAR '(' expr[r1] ')' { location_set(@1); - $$ = new_alphanumeric(1,"CHAR"); + $$ = new_alphanumeric("CHAR"); if( ! intrinsic_call_1($$, CHAR, $r1, @r1)) YYERROR; } /* convert formulations: @@ -10965,7 +10987,7 @@ intrinsic: function_udf | FORMATTED_DATE '(' DATE_FMT[r1] expr[r2] ')' { location_set(@1); - $$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATE, "FORMATTED-DATE"); + $$ = new_alphanumeric("FORMATTED-DATE"); auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); symbol_temporary_location(r1->field, @r1); if( ! intrinsic_call_2($$, FORMATTED_DATE, r1, $r2) ) YYERROR; @@ -10975,7 +10997,7 @@ intrinsic: function_udf | FORMATTED_DATETIME '(' DATETIME_FMT[r1] expr[r2] expr[r3] ')' { location_set(@1); - $$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATETIME, "FORMATTED-DATETIME"); + $$ = new_alphanumeric("FORMATTED-DATETIME"); auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); symbol_temporary_location(r1->field, @r1); static cbl_refer_t r3(literally_zero); @@ -10985,7 +11007,7 @@ intrinsic: function_udf | FORMATTED_DATETIME '(' DATETIME_FMT[r1] expr[r2] expr[r3] expr[r4] ')' { location_set(@1); - $$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATETIME, "FORMATTED-DATETIME"); + $$ = new_alphanumeric("FORMATTED-DATETIME"); auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); symbol_temporary_location(r1->field, @r1); if( ! intrinsic_call_4($$, FORMATTED_DATETIME, @@ -10997,7 +11019,7 @@ intrinsic: function_udf | FORMATTED_TIME '(' TIME_FMT[r1] expr[r2] expr[r3] ')' { location_set(@1); - $$ = new_alphanumeric(MAXLENGTH_FORMATTED_TIME, "FORMATTED-DATETIME"); + $$ = new_alphanumeric("FORMATTED-DATETIME"); auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); symbol_temporary_location(r1->field, @r1); if( ! intrinsic_call_3($$, FORMATTED_TIME, @@ -11005,7 +11027,7 @@ intrinsic: function_udf } | FORMATTED_TIME '(' TIME_FMT[r1] expr[r2] ')' { location_set(@1); - $$ = new_alphanumeric(MAXLENGTH_FORMATTED_TIME, "FORMATTED-TIME"); + $$ = new_alphanumeric("FORMATTED-TIME"); auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); auto r3 = new_reference(new_constant("0")); symbol_temporary_location(r1->field, @r1); @@ -11014,7 +11036,7 @@ intrinsic: function_udf } | FORMATTED_CURRENT_DATE '(' DATETIME_FMT[r1] ')' { location_set(@1); - $$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATETIME, "FORMATTED-CURRENT_DATE"); + $$ = new_alphanumeric("FORMATTED-CURRENT_DATE"); auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); symbol_temporary_location(r1->field, @r1); if( ! intrinsic_call_1($$, FORMATTED_CURRENT_DATE, r1, @r1) ) @@ -11099,13 +11121,13 @@ intrinsic: function_udf } | lopper_case[func] '(' alpha_val[r1] ')' { location_set(@1); - $$ = new_alphanumeric($r1->field->data.capacity(), "lopper_case[func]"); + $$ = new_alphanumeric("lopper_case[func]"); if( ! intrinsic_call_1($$, $func, $r1, @r1)) YYERROR; } | MODULE_NAME '(' module_type[type] ')' { - $$ = new_alphanumeric(sizeof(cbl_name_t), "MODULE-NAME"); + $$ = new_alphanumeric("MODULE-NAME"); parser_module_name( $$, $type ); } @@ -11266,7 +11288,7 @@ intrinsic: function_udf static auto one = new cbl_refer_t( new_constant("1") ); static auto four = new cbl_refer_t( new_constant("4") ); cbl_span_t year(one, four); - auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE)); + auto r3 = new_reference(new_alphanumeric()); r3->refmod = year; parser_intrinsic_call_0( r3->field, "__gg__current_date" ); @@ -11282,7 +11304,7 @@ intrinsic: function_udf static auto one = new cbl_refer_t( new_constant("1") ); static auto four = new cbl_refer_t( new_constant("4") ); cbl_span_t year(one, four); - auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE)); + auto r3 = new_reference(new_alphanumeric()); r3->refmod = year; parser_intrinsic_call_0( r3->field, "__gg__current_date" ); @@ -11308,7 +11330,7 @@ intrinsic: function_udf static auto one = new cbl_refer_t( new_constant("1") ); static auto four = new cbl_refer_t( new_constant("4") ); cbl_span_t year(one, four); - auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE)); + auto r3 = new_reference(new_alphanumeric()); r3->refmod = year; parser_intrinsic_call_0( r3->field, "__gg__current_date" ); @@ -11324,7 +11346,7 @@ intrinsic: function_udf static auto one = new cbl_refer_t( new_constant("1") ); static auto four = new cbl_refer_t( new_constant("4") ); cbl_span_t year(one, four); - auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE)); + auto r3 = new_reference(new_alphanumeric()); r3->refmod = year; parser_intrinsic_call_0( r3->field, "__gg__current_date" ); @@ -11350,7 +11372,7 @@ intrinsic: function_udf static auto one = new cbl_refer_t( new_constant("1") ); static auto four = new cbl_refer_t( new_constant("4") ); cbl_span_t year(one, four); - auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE)); + auto r3 = new_reference(new_alphanumeric()); r3->refmod = year; parser_intrinsic_call_0( r3->field, "__gg__current_date" ); @@ -11366,7 +11388,7 @@ intrinsic: function_udf static auto one = new cbl_refer_t( new_constant("1") ); static auto four = new cbl_refer_t( new_constant("4") ); cbl_span_t year(one, four); - auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE)); + auto r3 = new_reference(new_alphanumeric()); r3->refmod = year; parser_intrinsic_call_0( r3->field, "__gg__current_date" ); @@ -11531,7 +11553,7 @@ trim_trailing: %empty { $$ = new_constant("0"); } // Remove both intrinsic0: CURRENT_DATE { location_set(@1); - $$ = new_alphanumeric(MAXLENGTH_CALENDAR_DATE, "CURRENT-DATE"); + $$ = new_alphanumeric("CURRENT-DATE"); parser_intrinsic_call_0( $$, "__gg__current_date" ); } | E { @@ -11574,7 +11596,7 @@ intrinsic0: CURRENT_DATE { | PI { location_set(@1); - $$ = new_tempnumeric_float("PI"); + $$ = new_tempnumeric("PI"); parser_intrinsic_call_0( $$, "__gg__pi" ); } | SECONDS_PAST_MIDNIGHT { @@ -11590,7 +11612,7 @@ intrinsic0: CURRENT_DATE { | WHEN_COMPILED { location_set(@1); // Returns YYYYMMDDhhmmssss-0500) - $$ = new_alphanumeric(MAXLENGTH_CALENDAR_DATE, "WHEN-COMPILED"); + $$ = new_alphanumeric("WHEN-COMPILED"); parser_intrinsic_call_0( $$, "__gg__when_compiled" ); } ; diff --git a/gcc/cobol/parse_ante.h b/gcc/cobol/parse_ante.h index a89237de9885..068edc275ee0 100644 --- a/gcc/cobol/parse_ante.h +++ b/gcc/cobol/parse_ante.h @@ -39,11 +39,6 @@ #include <stack> #include <string> -#define MAXLENGTH_FORMATTED_DATE (10*4) -#define MAXLENGTH_FORMATTED_TIME (19*4) -#define MAXLENGTH_CALENDAR_DATE (21*4) -#define MAXLENGTH_FORMATTED_DATETIME (30*4) - #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmissing-field-initializers" @@ -224,16 +219,6 @@ namcpy(const YYLTYPE& loc, cbl_name_t tgt, const char *src ) { return true; } -cbl_field_t * -new_alphanumeric( size_t capacity = MAXIMUM_ALPHA_LENGTH, - const cbl_name_t name = nullptr, - cbl_encoding_t encoding = no_encoding_e ); - -static inline cbl_field_t * -new_alphanumeric( const cbl_name_t name, cbl_encoding_t encoding = no_encoding_e ) { - return new_alphanumeric(MAXIMUM_ALPHA_LENGTH, name, encoding); -} - static inline cbl_refer_t * new_reference( enum cbl_field_type_t type, const char *initial ) { return new cbl_refer_t( new_temporary(type, initial) ); @@ -588,6 +573,8 @@ static bool ast_multiply( arith_t *arith ); static bool ast_divide( arith_t *arith ); static cbl_field_type_t intrinsic_return_type( int token ); +static cbl_field_t *intrinsic_return_field( int token, + std::vector<cbl_refer_t> ); template <typename T> static T* use_any( list<T>& src, T *tgt) { @@ -1220,10 +1207,14 @@ struct ffi_args_t { void dump() const { int i=0; for( const auto& arg : elems ) { - dbgmsg( "%8d) %-10s %-16s %s", i++, - cbl_ffi_crv_str(arg.crv), - 3 + cbl_field_type_str(arg.refer.field->type), - arg.refer.field->pretty_name() ); + if( arg.refer.field ) { + dbgmsg( "%8d) %-10s %-16s %s", i++, + cbl_ffi_crv_str(arg.crv), + 3 + cbl_field_type_str(arg.refer.field->type), + arg.refer.field->pretty_name() ); + } else { + dbgmsg( "%8d) %-10s [omitted]", i++, cbl_ffi_crv_str(arg.crv) ); + } } } @@ -3710,4 +3701,12 @@ static void ast_first_statement( const YYLTYPE& loc ) { } } +template <typename V> +bool is_among( V value, const std::list<V>& container ) { + return std::any_of( container.begin(), container.end(), + [value]( const auto& elem ) { + return value == elem; + } ); +} + #pragma GCC diagnostic push diff --git a/gcc/cobol/parse_util.h b/gcc/cobol/parse_util.h index 463ade3242c9..e3bcc79a68f9 100644 --- a/gcc/cobol/parse_util.h +++ b/gcc/cobol/parse_util.h @@ -45,41 +45,47 @@ * X Alphanumeric * n variadic * We use just A, I, N, or X, choosing the most general for each parameter. + * + * When FldInvalid is shown as the return type, it indicates that the type + * of the function is determined by the type of the first parameter. + * + * We use FldNumericBin5 for functions of type "Integer", and FldFloat for + * functions of type "Numeric", */ static const function_descr_t function_descrs[] = { { ABS, "ABS", - "__gg__abs", "N", {}, FldNumericBin5 }, + "__gg__abs", "N", {}, FldInvalid }, { ACOS, "ACOS", - "__gg__acos", "N", {}, FldNumericBin5 }, + "__gg__acos", "N", {}, FldFloat }, { ANNUITY, "ANNUITY", - "__gg__annuity", "NI", {}, FldNumericBin5 }, + "__gg__annuity", "NI", {}, FldFloat }, { ASIN, "ASIN", - "__gg__asin", "N", {}, FldNumericBin5 }, + "__gg__asin", "N", {}, FldFloat }, { ATAN, "ATAN", - "__gg__atan", "N", {}, FldNumericBin5 }, - { BASECONVERT, "BASECONVERT", - "__gg__baseconvert", "XII", {}, FldNumericBin5 }, + "__gg__atan", "N", {}, FldFloat }, + { BASECONVERT, "BASECONVERT", + "__gg__baseconvert", "XII", {}, FldAlphanumeric }, { BIT_OF, "BIT-OF", "__gg__bit_of", "X", {}, FldAlphanumeric }, { BIT_TO_CHAR, "BIT-TO-CHAR", "__gg__bit_to_char", "X", {}, FldAlphanumeric }, // BOOLEAN-OF-INTEGER requires FldBoolean - { BOOLEAN_OF_INTEGER, "BOOLEAN-OF-INTEGER", - "__gg__boolean_of_integer", "II", {}, FldNumericBin5 }, + { BOOLEAN_OF_INTEGER, "BOOLEAN-OF-INTEGER", + "__gg__boolean_of_integer", "II", {}, FldNumericBin5 }, { BYTE_LENGTH, "BYTE-LENGTH", "__gg__byte_length", "X", {}, FldNumericBin5 }, { CHAR, "CHAR", "__gg__char", "I", {}, FldAlphanumeric }, - { CHAR_NATIONAL, "CHAR-NATIONAL", - "__gg__char_national", "I", {}, FldAlphanumeric }, + { CHAR_NATIONAL, "CHAR-NATIONAL", + "__gg__char_national", "I", {}, FldAlphanumeric }, { COMBINED_DATETIME, "COMBINED-DATETIME", - "__gg__combined_datetime", "IN", {}, FldNumericBin5 }, + "__gg__combined_datetime", "IN", {}, FldFloat }, { CONCAT, "CONCAT", "__gg__concat", "n", {}, FldAlphanumeric }, - { CONVERT, "CONVERT", - "__gg__convert", "XII", {}, FldAlphanumeric }, + { CONVERT, "CONVERT", + "__gg__convert", "XII", {}, FldAlphanumeric }, { COS, "COS", - "__gg__cos", "N", {}, FldNumericBin5 }, + "__gg__cos", "N", {}, FldFloat }, { CURRENT_DATE, "CURRENT-DATE", "__gg__current_date", "", {}, FldAlphanumeric }, { DATE_OF_INTEGER, "DATE-OF-INTEGER", @@ -94,7 +100,6 @@ static const function_descr_t function_descrs[] = { "__gg__display_of", "UUI", {}, FldAlphanumeric }, { E, "E", "__gg_e", "", {}, FldNumericBin5 }, - { EXCEPTION_FILE, "EXCEPTION-FILE", "__gg__func_exception_file", "", {}, FldAlphanumeric }, { EXCEPTION_FILE_N, "EXCEPTION-FILE-N", @@ -107,11 +112,10 @@ static const function_descr_t function_descrs[] = { "__gg__func_exception_statement", "", {}, FldAlphanumeric }, { EXCEPTION_STATUS, "EXCEPTION-STATUS", "__gg__func_exception_status", "", {}, FldAlphanumeric }, - { EXP, "EXP", - "__gg__exp", "N", {}, FldNumericBin5 }, + "__gg__exp", "N", {}, FldFloat }, { EXP10, "EXP10", - "__gg__exp10", "N", {}, FldNumericBin5 }, + "__gg__exp10", "N", {}, FldFloat }, { FACTORIAL, "FACTORIAL", "__gg__factorial", "I", {}, FldNumericBin5 }, { FIND_STRING, "FIND-STRING", @@ -123,20 +127,20 @@ static const function_descr_t function_descrs[] = { { FORMATTED_DATETIME, "FORMATTED-DATETIME", "__gg__formatted_datetime", "XINI", {}, FldAlphanumeric }, { FORMATTED_TIME, "FORMATTED-TIME", - "__gg__formatted_time", "INI", {}, FldNumericBin5 }, + "__gg__formatted_time", "INI", {}, FldAlphanumeric }, { FRACTION_PART, "FRACTION-PART", - "__gg__fraction_part", "N", {}, FldNumericBin5 }, + "__gg__fraction_part", "N", {}, FldFloat }, { HEX_OF, "HEX-OF", "__gg__hex_of", "X", {}, FldAlphanumeric }, { HEX_TO_CHAR, "HEX-TO-CHAR", "__gg__hex_to_char", "X", {}, FldAlphanumeric }, { HIGHEST_ALGEBRAIC, "HIGHEST-ALGEBRAIC", - "__gg__highest_algebraic", "N", {}, FldNumericBin5 }, + "__gg__highest_algebraic", "N", {}, FldInvalid }, { INTEGER, "INTEGER", "__gg__integer", "N", {}, FldNumericBin5 }, // requires FldBoolean - { INTEGER_OF_BOOLEAN, "INTEGER-OF-BOOLEAN", - "__gg__integer_of_boolean", "B", {}, FldNumericBin5 }, + { INTEGER_OF_BOOLEAN, "INTEGER-OF-BOOLEAN", + "__gg__integer_of_boolean", "B", {}, FldNumericBin5 }, { INTEGER_OF_DATE, "INTEGER-OF-DATE", "__gg__integer_of_date", "I", {}, FldNumericBin5 }, { INTEGER_OF_DAY, "INTEGER-OF-DAY", @@ -155,38 +159,36 @@ static const function_descr_t function_descrs[] = { "__gg__locale_time", "XX", {}, FldNumericBin5 }, { LOCALE_TIME_FROM_SECONDS, "LOCALE-TIME-FROM-SECONDS", "__gg__locale_time_from_seconds", "NX", {}, FldNumericBin5 }, - { LOG, "LOG", - "__gg__log", "N", {}, FldNumericBin5 }, + "__gg__log", "N", {}, FldFloat }, { LOG10, "LOG10", - "__gg__log10", "N", {}, FldNumericBin5 }, + "__gg__log10", "N", {}, FldFloat }, { LOWER_CASE, "LOWER-CASE", "__gg__lower_case", "X", {}, FldAlphanumeric }, { LOWEST_ALGEBRAIC, "LOWEST-ALGEBRAIC", - "__gg__lowest_algebraic", "N", {}, FldNumericBin5 }, - + "__gg__lowest_algebraic", "N", {}, FldInvalid }, { MAXX, "MAX", - "__gg__max", "n", {}, FldAlphanumeric }, + "__gg__max", "n", {}, FldInvalid }, { MEAN, "MEAN", - "__gg__mean", "n", {}, FldNumericBin5 }, + "__gg__mean", "n", {}, FldFloat }, { MEDIAN, "MEDIAN", - "__gg__median", "n", {}, FldNumericBin5 }, + "__gg__median", "n", {}, FldFloat }, { MIDRANGE, "MIDRANGE", - "__gg__midrange", "n", {}, FldNumericBin5 }, + "__gg__midrange", "n", {}, FldFloat }, { MINN, "MIN", - "__gg__min", "n", {}, FldAlphanumeric }, + "__gg__min", "n", {}, FldInvalid }, { MOD, "MOD", "__gg__mod", "IN", {}, FldNumericBin5 }, - { MODULE_NAME, "MODULE-NAME", - "__gg__module_name", "I", {}, FldAlphanumeric }, + { MODULE_NAME, "MODULE-NAME", + "__gg__module_name", "I", {}, FldAlphanumeric }, { NATIONAL_OF, "NATIONAL-OF", "__gg__national_of", "XX", {}, FldAlphanumeric }, { NUMVAL, "NUMVAL", - "__gg__numval", "X", {}, FldNumericBin5 }, + "__gg__numval", "X", {}, FldFloat }, { NUMVAL_C, "NUMVAL-C", - "__gg__numval_c", "XXU", {}, FldNumericBin5 }, + "__gg__numval_c", "XXU", {}, FldFloat }, { NUMVAL_F, "NUMVAL-F", - "__gg__numval_f", "X", {}, FldNumericBin5 }, + "__gg__numval_f", "X", {}, FldFloat }, { ORD, "ORD", "__gg__ord", "X", {}, FldNumericBin5 }, { ORD_MAX, "ORD-MAX", @@ -196,37 +198,37 @@ static const function_descr_t function_descrs[] = { { PI, "PI", "__gg__pi", "", {}, FldNumericBin5 }, { PRESENT_VALUE, "PRESENT-VALUE", - "__gg__present_value", "n", {}, FldNumericBin5 }, + "__gg__present_value", "n", {}, FldFloat }, { RANDOM, "RANDOM", - "__gg__random", "I", {}, FldNumericBin5 }, + "__gg__random", "I", {}, FldFloat }, { RANGE, "RANGE", - "__gg__range", "n", {}, FldNumericBin5 }, + "__gg__range", "n", {}, FldInvalid }, { REM, "REM", - "__gg__rem", "NN", {}, FldNumericBin5 }, + "__gg__rem", "NN", {}, FldFloat }, { REVERSE, "REVERSE", "__gg__reverse", "X", {}, FldAlphanumeric }, { SECONDS_FROM_FORMATTED_TIME, "SECONDS-FROM-FORMATTED-TIME", - "__gg__seconds_from_formatted_time", "XX", {}, FldAlphanumeric }, + "__gg__seconds_from_formatted_time", "XX", {}, FldFloat }, { SECONDS_PAST_MIDNIGHT, "SECONDS_PAST_MIDNIGHT", - "__gg__seconds_past_midnight", "", {}, FldAlphanumeric }, + "__gg__seconds_past_midnight", "", {}, FldFloat }, { SIGN, "SIGN", "__gg__sign", "N", {}, FldNumericBin5 }, { SIN, "SIN", - "__gg__sin", "N", {}, FldNumericBin5 }, - { SMALLEST_ALGEBRAIC, "SMALLEST-ALGEBRAIC", - "__gg__smallest_algebraic", "N", {}, FldNumericBin5 }, + "__gg__sin", "N", {}, FldFloat }, + { SMALLEST_ALGEBRAIC, "SMALLEST-ALGEBRAIC", + "__gg__smallest_algebraic", "N", {}, FldInvalid }, { SQRT, "SQRT", - "__gg__sqrt", "N", {}, FldNumericBin5 }, - { STANDARD_COMPARE, "STANDARD-COMPARE", - "__gg__standard_compare", "XXXI", {}, FldAlphanumeric }, + "__gg__sqrt", "N", {}, FldFloat }, + { STANDARD_COMPARE, "STANDARD-COMPARE", + "__gg__standard_compare", "XXXI", {}, FldAlphanumeric }, { STANDARD_DEVIATION, "STANDARD-DEVIATION", - "__gg__standard_deviation", "n", {}, FldNumericBin5 }, - { SUBSTITUTE, "SUBSTITUTE", - "__gg__substitute", "XXX", {}, FldAlphanumeric }, + "__gg__standard_deviation", "n", {}, FldFloat }, + { SUBSTITUTE, "SUBSTITUTE", + "__gg__substitute", "XXX", {}, FldAlphanumeric }, { SUM, "SUM", - "__gg__sum", "n", {}, FldNumericBin5 }, + "__gg__sum", "n", {}, FldInvalid }, { TAN, "TAN", - "__gg__tan", "N", {}, FldNumericBin5 }, + "__gg__tan", "N", {}, FldFloat }, { TEST_DATE_YYYYMMDD, "TEST-DATE-YYYYMMDD", "__gg__test_date_yyyymmdd", "I", {}, FldNumericBin5 }, { TEST_DAY_YYYYDDD, "TEST-DAY-YYYYDDD", @@ -258,7 +260,7 @@ static const function_descr_t function_descrs[] = { { UWIDTH, "UWIDTH", "__gg__uwidth", "XI", {}, FldAlphanumeric }, { VARIANCE, "VARIANCE", - "__gg__variance", "n", {}, FldNumericBin5 }, + "__gg__variance", "n", {}, FldFloat }, { WHEN_COMPILED, "WHEN-COMPILED", "__gg__when_compiled", "", {}, FldAlphanumeric }, { YEAR_TO_YYYY, "YEAR-TO-YYYY", @@ -322,6 +324,61 @@ intrinsic_return_type( int token ) { return p == function_descrs_end? FldAlphanumeric : p->ret_type; } +static cbl_field_t * +intrinsic_return_field(int token, std::vector<cbl_refer_t> args) + { + cbl_field_t *retval; + + cbl_field_type_t func_type = intrinsic_return_type(token); + switch(func_type) + { + case FldAlphanumeric: + retval = new_alphanumeric(); + break; + case FldNumericBin5: + retval = new_tempnumeric(); + break; + case FldFloat: + retval = new_tempnumeric_float(); + break; + case FldInvalid: + // This is a flag that a function takes the type of its first input + assert( args.size() ); + switch(args[0].field->type) + { + case FldGroup: + case FldAlphanumeric: + case FldAlphaEdited: + case FldLiteralA: + retval = new_alphanumeric(); + break; + case FldNumericBinary: + case FldPacked: + case FldNumericDisplay: + case FldNumericBin5: + case FldLiteralN: + case FldIndex: + case FldPointer: + retval = new_tempnumeric(); + break; + case FldFloat: + retval = new_tempnumeric_float(); + break; + default: + retval = NULL; + gcc_unreachable(); + break; + } + break; + default: + retval = NULL; + gcc_unreachable(); + break; + } + + return retval; + } + static const char * intrinsic_cname( int token ) { auto p = std::find_if( function_descrs, diff --git a/gcc/cobol/scan.l b/gcc/cobol/scan.l index d5ec34a46857..2ffc025f332a 100644 --- a/gcc/cobol/scan.l +++ b/gcc/cobol/scan.l @@ -313,6 +313,59 @@ PROCEDURE{SPC}DIVISION { yy_push_state(procedure_div); } <INITIAL,procedure_div,cdf_state>{ + (IS{SPC})?"<" { return '<'; } + (IS{SPC})?"<=" { return LE; } + (IS{SPC})?"=" { static char eq[] = "="; + ydflval.string = yylval.string = eq; + return EQ; } + (IS{SPC})?"<>" { return NE; } + (IS{SPC})?">=" { return GE; } + (IS{SPC})?">" { return '>'; } + + {LESS_THAN} { return '<'; } + {LESS_THAN}{SPC}{OR_EQUAL}/[[:space:]] { return LE; } + (IS{SPC})?EQUALS?({SPC}TO)?/[[:space:]] { + static char eq[] = "EQUAL"; + ydflval.string = yylval.string = eq; + return EQ; } + {GREATER_THAN}{SPC}{OR_EQUAL}/[[:space:]] { return GE; } + {GREATER_THAN} { return '>'; } + + {ISNT}{OSPC}">=" { verify_ws(yytext[yyleng - 3]); return '<'; } + {ISNT}{OSPC}">" { verify_ws(yytext[yyleng - 2]); return LE; } + {ISNT}{OSPC}"=" { verify_ws(yytext[yyleng - 2]); return NE; } + {ISNT}{OSPC}"<" { verify_ws(yytext[yyleng - 2]); return GE; } + {ISNT}{OSPC}"<=" { verify_ws(yytext[yyleng - 3]); return '>'; } + + {ISNT}{SPC}GREATER{SPC}(THAN)?{SPC}{OR_EQUAL}/[[:space:]] { return '<'; } + {ISNT}{SPC}GREATER{SPC}(THAN)? { return LE; } + {ISNT}{SPC}EQUALS?{SPC}(TO)? { return NE; } + {ISNT}{SPC}LESS{SPC}(THAN)? { return GE; } + {ISNT}{SPC}LESS{SPC}(THAN)?{SPC}{OR_EQUAL}/[[:space:]] { return '>'; } + + [*]{2} { return POW; } + + /* + * "A boolean operator specifies the type of boolean operation to be performed + * on one or two operands, for a unary operator or binary operator, + * respectively." + * Binary boolean operators + * B-AND B-OR B-XOR + * Unary boolean operator + * B-NOT + * Boolean shift operators + * B-SHIFT-L B-SHIFT-LC B-SHIFT-R B-SHIFT-RC + */ + /**** +B-AND +B-OR +B-XOR +B-NOT +B-SHIFT-L +B-SHIFT-LC +B-SHIFT-R +B-SHIFT-RC + ****/ /* unused Context Words */ ARITHMETIC { return ARITHMETIC; } @@ -1089,6 +1142,7 @@ USE({SPC}FOR)? { return USE; } DEPENDING { return DEPENDING; } DESCENDING { return DESCENDING; } DISPLAY { return DISPLAY; } + EBCDIC { return EBCDIC; } EJECT{DOTEOL}? { dialect_ok(yylloc, IbmEjectE, "EJECT"); auto len = yyleng - 1; @@ -1107,6 +1161,7 @@ USE({SPC}FOR)? { return USE; } LEADING { return LEADING; } LEFT { return LEFT; } MODE { return MODE; } + NATIVE { return NATIVE; } NO { return NO; } OCCURS/{SPC}{NAME} { return OCCURS; } OCCURS { yy_push_state(integer_count); return OCCURS; } @@ -1127,6 +1182,7 @@ USE({SPC}FOR)? { return USE; } SIGN { return SIGN; } SIZE { return SIZE; } STANDARD { return STANDARD; } + STANDARD{SPC}ALPHABET { return STANDARD_ALPHABET; } STRONG { return STRONG; } SYNC(HRONIZED)? { return SYNCHRONIZED; } TIMES { return TIMES; } @@ -1506,62 +1562,6 @@ USE({SPC}FOR)? { return USE; } } } -<cdf_state,procedure_div>{ - (IS{SPC})?"<" { return '<'; } - (IS{SPC})?"<=" { return LE; } - (IS{SPC})?"=" { static char eq[] = "="; - ydflval.string = yylval.string = eq; - return EQ; } - (IS{SPC})?"<>" { return NE; } - (IS{SPC})?">=" { return GE; } - (IS{SPC})?">" { return '>'; } - - {LESS_THAN} { return '<'; } - {LESS_THAN}{SPC}{OR_EQUAL}/[[:space:]] { return LE; } - (IS{SPC})?EQUALS?({SPC}TO)?/[[:space:]] { - static char eq[] = "EQUAL"; - ydflval.string = yylval.string = eq; - return EQ; } - {GREATER_THAN}{SPC}{OR_EQUAL}/[[:space:]] { return GE; } - {GREATER_THAN} { return '>'; } - - {ISNT}{OSPC}">=" { verify_ws(yytext[yyleng - 3]); return '<'; } - {ISNT}{OSPC}">" { verify_ws(yytext[yyleng - 2]); return LE; } - {ISNT}{OSPC}"=" { verify_ws(yytext[yyleng - 2]); return NE; } - {ISNT}{OSPC}"<" { verify_ws(yytext[yyleng - 2]); return GE; } - {ISNT}{OSPC}"<=" { verify_ws(yytext[yyleng - 3]); return '>'; } - - {ISNT}{SPC}GREATER{SPC}(THAN)?{SPC}{OR_EQUAL}/[[:space:]] { return '<'; } - {ISNT}{SPC}GREATER{SPC}(THAN)? { return LE; } - {ISNT}{SPC}EQUALS?{SPC}(TO)? { return NE; } - {ISNT}{SPC}LESS{SPC}(THAN)? { return GE; } - {ISNT}{SPC}LESS{SPC}(THAN)?{SPC}{OR_EQUAL}/[[:space:]] { return '>'; } - - [*]{2} { return POW; } - - /* - * "A boolean operator specifies the type of boolean operation to be performed - * on one or two operands, for a unary operator or binary operator, - * respectively." - * Binary boolean operators - * B-AND B-OR B-XOR - * Unary boolean operator - * B-NOT - * Boolean shift operators - * B-SHIFT-L B-SHIFT-LC B-SHIFT-R B-SHIFT-RC - */ - -B-AND -B-OR -B-XOR -B-NOT -B-SHIFT-L -B-SHIFT-LC -B-SHIFT-R -B-SHIFT-RC - -} - <procedure_div>{ (ID|IDENTIFICATION|ENVIRONMENT|DATA|PROCEDURE){SPC}DIVISION { myless(0); BEGIN(INITIAL); } @@ -1591,8 +1591,16 @@ B-SHIFT-RC {ISNT}{SPC}NEGATIVE/[[:space:]] { yylval.number = NOT; return NEGATIVE; } {ISNT}{SPC}ZERO/[[:space:]] { yylval.number = NOT; return ZERO; } + {ISNT}{SPC}/OMITTED { return NOT; } + [(:)] { return *yytext; } [(]/[^(:)""'']*[:][^)]*[)] { return LPAREN; /* parentheses around a colon */ } + [(][^:""''\n]*[:][^)]*[)] { // does not match foo(bar)\n: :-( + int tok = is_refmod(yytext, yytext + yyleng)? + int(LPAREN) : '('; + myless(1); + return tok; + } FILLER { return FILLER_kw; } INVALID { yylval.number = INVALID; return INVALID; } @@ -2177,8 +2185,9 @@ BASIS { yy_push_state(basis); return BASIS; } {POP_FILE} { yy_set_bol(true); + yylineno = input_file_status.pending().lineno; input_file_status.leave(); - yylineno = cobol_lineno(); + dbgmsg("banged yylineno to %d", yylineno); } {LINE_DIRECTIVE} { diff --git a/gcc/cobol/scan_ante.h b/gcc/cobol/scan_ante.h index 2f87b1a5d6d9..cd2798aa3ece 100644 --- a/gcc/cobol/scan_ante.h +++ b/gcc/cobol/scan_ante.h @@ -396,14 +396,33 @@ class enter_leave_t { } }; -static class input_file_status_t { +/* + * The lexer knows the immediate status of the input file and its line number + * from the PUSH, POP, and LINE directives. It saves yylineno whenever it + * encounters a PUSH, and updates it for a POP. + * + * The line number trickles into the parser by way of location. Only the + * parser knows what token it is parsing. As for the filename, the lexer + * queues enter/leave notices for the parser. + * + * Whenever the parser fetches a token, it gets the current line number from + * yylineno, and the current filename by depleting the notification queue, if + * any, and using the last one. + */ +class input_file_status_t { + public: + struct input_pos_t { int lineno; const char *filename; }; + private: std::queue <enter_leave_t> inputs; + std::stack<input_pos_t> positions; public: void enter(const char *filename) { inputs.push( enter_leave_t(parser_enter_file, filename) ); + positions.push( input_pos_t{ yylineno, filename } ); } void leave() { inputs.push( enter_leave_t(parser_leave_file) ); + positions.pop(); } void notify() { while( ! inputs.empty() ) { @@ -412,7 +431,10 @@ static class input_file_status_t { inputs.pop(); } } -} input_file_status; + input_pos_t pending() const { assert( ! positions.empty() ); return positions.top(); } +}; + +static input_file_status_t input_file_status; void input_file_status_notify() { input_file_status.notify(); } @@ -636,12 +658,11 @@ binary_integer_usage( const char name[]) { } static void -verify_ws( const YYLTYPE& loc, const char [] /* input[] */, char ch ) { +verify_ws( char ch ) { if( ! fisspace(ch) ) { - dialect_ok(loc, LexSeparatorE, "missing separator space"); + dialect_ok(yylloc, LexSeparatorE, "missing separator space"); } } -#define verify_ws(C) verify_ws(yylloc, yytext, C) int binary_integer_usage_of( const char name[] ) { @@ -1253,8 +1274,60 @@ integer_of( const char input[], bool is_hex = false) { return output; } - - - - - +/* + * Loosely parse what might be a refmod expression. This is used to decide + * whether to indicate a refmod to the parser with an LPAREN token, or not, + * with a '(' token. The input is known to have a first line that begins with + * '('., includes ':', and ends with ')'. + */ +static bool +is_refmod( const char input[], const char enput[] ) { + if( input == enput ) return false; + + switch(*input) { + case '(': + input = std::find( ++input, enput, ')'); + if( input == enput ) return false; + return is_refmod(++input, enput); + case ':': + return is_refmod(++input, enput); + case ')': + if( ++input == enput ) return true; + return is_refmod(input, enput); + default: + if( ISSPACE(*input) ) { + input = std::find_if( ++input, enput, + []( char ch ) { + return ! ISSPACE(ch); + } ); + return is_refmod(input, enput); + } + break; + } + input = std::find_if( input, enput, + [start = *input]( char ch ) { + bool yes = false; + if( ISDIGIT(start) ) { + switch(ch) { + case '+': case '-': case '*': case '/': + yes = true; break; + case '.': case ',': + yes = true; break; + default: + yes = ISDIGIT(ch); + break; + } + } else { + assert(ISALNUM(start)); + switch(ch) { + case '-': + yes = true; break; + default: + yes = ISALNUM(ch); + break; + } + } + return !yes; + } ); + return is_refmod(input, enput); +} diff --git a/gcc/cobol/symbols.cc b/gcc/cobol/symbols.cc index 6229365284d7..99d638e8c169 100644 --- a/gcc/cobol/symbols.cc +++ b/gcc/cobol/symbols.cc @@ -1868,7 +1868,12 @@ symbols_update( size_t first, bool parsed_ok ) { return 0; } } - // Better to report an error than to fail mysteriously with "0 errors". + /* + * The parser sets an incomplete field with 0 capacity to FldInvalid. If + * the field proves to be a group symbol_field_add() sets it to FldGroup + * and its size is calculated above. If that doesn't happen, it gets + * flagged here. + */ if( yydebug || parse_error_count() == 0 ) { if( field->type == FldInvalid ) { ERROR_FIELD(field, "line %d: %s %s requires PICTURE", @@ -3456,8 +3461,47 @@ cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) { error_msg(loc, "ALSO value %zu is unknown", ch); } -using std::deque; -static deque<cbl_field_t*> stack; +static symbol_temporaries_t program_temporaries; + +/* + * Supply a reference to the current list of temporaries for use by codegen to free + * the memory if it decides to return to the caller. + */ +symbol_temporaries_t& +symbol_temporaries() { + return program_temporaries; +} + +symbol_temporaries_t +symbol_temporary_alphanumerics() { + symbol_temporaries_t output; + std::copy_if( program_temporaries.begin(), + program_temporaries.end(), + std::back_inserter(output), + []( auto f ) { + switch(f->type) { + case FldAlphaEdited: + case FldAlphanumeric: + return f->has_attr(intermediate_e); + case FldFloat: + case FldNumericBin5: + case FldNumericBinary: + case FldNumericDisplay: + case FldNumericEdited: + case FldPacked: + default: + break; + } + return false; + } ); + for( cbl_field_t *f : output ) { + auto p = std::find( program_temporaries.begin(), + program_temporaries.end(), + f ); + program_temporaries.erase(p); + } + return output; +} /* * Allocate a temporary field. Assign the type and name, if supplied. Caller @@ -3469,8 +3513,8 @@ new_temporary_impl( enum cbl_field_type_t type, const cbl_name_t name = nullptr extern int yylineno; static const struct cbl_field_t empty_alpha = { FldAlphanumeric, intermediate_e, - {MAXIMUM_ALPHA_LENGTH, - MAXIMUM_ALPHA_LENGTH, 0, 0, NULL} }; + {0, + 0, 0, 0, NULL} }; static const struct cbl_field_t empty_float = { FldFloat, intermediate_e, {16, 16, 32, 0, NULL} }; @@ -3533,6 +3577,8 @@ new_temporary_impl( enum cbl_field_type_t type, const cbl_name_t name = nullptr f->codeset.set(); + program_temporaries.push_back(f); + return f; } @@ -3735,9 +3781,8 @@ symbol_temporaries_free() { } cbl_field_t * -new_alphanumeric( size_t capacity, const cbl_name_t name, cbl_encoding_t encoding ) { +new_alphanumeric( const cbl_name_t name, cbl_encoding_t encoding ) { cbl_field_t * field = new_temporary_impl(FldAlphanumeric, name); - field->set_capacity( capacity ); if( encoding != no_encoding_e ) { field->codeset.set(encoding); } diff --git a/gcc/cobol/symbols.h b/gcc/cobol/symbols.h index 398be73f0e14..87409857afc0 100644 --- a/gcc/cobol/symbols.h +++ b/gcc/cobol/symbols.h @@ -74,7 +74,7 @@ cbl_dialect_str(cbl_dialect_t dialect) { } return "???"; -}; +} // Dialects may be combined. extern unsigned int cbl_dialects; @@ -248,11 +248,15 @@ enum symbol_type_t { SymLocale, }; -// The ISO specification says alphanumeric literals have a maximum length of -// 8,191 characters. It seems to be silent on the length of alphanumeric data -// items. Our implementation requires a maximum length, so we chose to make it -// the same. -#define MAXIMUM_ALPHA_LENGTH 8192 +// From Enterprise COBOL for z/OS 6.4 Language Reference, Appendix B. +// ISO specifies no limit in 13.18.40.3 Syntax rules. +// CobolCraft sometimes needs 2,100,000 or about 2 MB. +#ifdef COBOL_MAXIMUM_ALPHA_LENGTH +# define MAXIMUM_ALPHA_LENGTH size_t(COBOL_MAXIMUM_ALPHA_LENGTH) +#else +# define IBM_MAXIMUM_ALPHA_LENGTH (size_t(1) << 31) +# define MAXIMUM_ALPHA_LENGTH IBM_MAXIMUM_ALPHA_LENGTH +#endif class cbl_field_data_t { uint32_t nbyte; // allocated space @@ -933,6 +937,11 @@ struct cbl_field_t { const cbl_field_t * cbl_figconst_field_of( const char *value ); +typedef std::list<cbl_field_t*> symbol_temporaries_t; + +symbol_temporaries_t& symbol_temporaries(); +symbol_temporaries_t symbol_temporary_alphanumerics(); + // Necessary forward referencea struct cbl_label_t; struct cbl_refer_t; @@ -1658,8 +1667,9 @@ struct function_descr_t { char cname[48]; char types[8]; std::vector<function_descr_arg_t> linkage_fields; - cbl_field_type_t ret_type; - + cbl_field_type_t ret_type; // When the ret_type is FldInvalid, that + // indicates the function takes on the type of + // the first argument. static function_descr_t init( const char name[] ) { function_descr_t descr = {}; if( -1 == snprintf( descr.name, sizeof(descr.name), "%s", name ) ) { @@ -3081,4 +3091,7 @@ void current_enabled_ecs( tree ena ); bool validate_numeric_edited(cbl_field_t *field); +cbl_field_t *new_alphanumeric(const cbl_name_t name=nullptr, + cbl_encoding_t encoding=no_encoding_e ); + #endif diff --git a/gcc/cobol/util.cc b/gcc/cobol/util.cc index d9978e42da14..076bcf89a059 100644 --- a/gcc/cobol/util.cc +++ b/gcc/cobol/util.cc @@ -143,17 +143,16 @@ class cdf_directives_t { template <typename T> class cdf_stack_t : private std::stack<T> { // cppcheck-suppress noConstructor - T default_value; + T current_value; const T& top() const { return std::stack<T>::top(); } bool empty() const { return std::stack<T>::empty(); } public: void value( const T& value ) { - T& output( empty()? default_value : std::stack<T>::top() ); // cppcheck-suppress constVariableReference - output = value; - dbgmsg("cdf_directives_t::%s: %s", __func__, str(output).c_str()); + current_value = value; + dbgmsg("cdf_directives_t::%s: %s", __func__, str(current_value).c_str()); } T& value() { - return empty()? default_value : std::stack<T>::top(); + return current_value; } void push() { std::stack<T>::push(value()); @@ -164,9 +163,9 @@ class cdf_directives_t error_msg(YYLTYPE(), "CDF stack empty"); // cppcheck-suppress syntaxError return; } - default_value = top(); + current_value = top(); std::stack<T>::pop(); - dbgmsg("cdf_directives_t::%s: %s", __func__, str(default_value).c_str()); + dbgmsg("cdf_directives_t::%s: %s", __func__, str(current_value).c_str()); } protected: static std::string str(cbl_call_convention_t arg) { @@ -237,6 +236,8 @@ cdf_dictionary() { void cobol_set_indicator_column( int column ) { cdf_directives.source_format.value().indicator_column_set(column); + dbgmsg("%s: format now %s", __func__, + cdf_directives.source_format.value().description()); } source_format_t& cdf_source_format() { return cdf_directives.source_format.value(); @@ -252,14 +253,22 @@ void cdf_push_call_convention() { cdf_directives.call_convention.push(); } void cdf_push_current_tokens() { cdf_directives.cobol_words.push(); } void cdf_push_dictionary() { cdf_directives.dictionary.push(); } void cdf_push_enabled_exceptions() { cdf_directives.enabled_exceptions.push(); } -void cdf_push_source_format() { cdf_directives.source_format.push(); } +void cdf_push_source_format() { + cdf_directives.source_format.push(); + dbgmsg("%s: format still %s", __func__, + cdf_directives.source_format.value().description()); +} void cdf_pop() { cdf_directives.pop(); } void cdf_pop_call_convention() { cdf_directives.call_convention.pop(); } void cdf_pop_current_tokens() { cdf_directives.cobol_words.pop(); } void cdf_pop_dictionary() { cdf_directives.dictionary.pop(); } void cdf_pop_enabled_exceptions() { cdf_directives.enabled_exceptions.pop(); } -void cdf_pop_source_format() { cdf_directives.source_format.pop(); } +void cdf_pop_source_format() { + cdf_directives.source_format.pop(); + dbgmsg("%s: format now %s", __func__, + cdf_directives.source_format.value().description()); +} /* * Construct a cbl_field_t from a CDF literal, to be installed in the symbol table. @@ -2643,16 +2652,11 @@ parent_names( const symbol_elem_t *elem, if( is_filler(cbl_field_of(elem)) ) return; - // dbgmsg("%s: asked about %s of %s (" HOST_SIZE_T_PRINT_UNSIGNED " away)", __func__, - // cbl_field_of(elem)->name, - // cbl_field_of(group)->name, (fmt_size_t)(elem - group)); - for( const symbol_elem_t *e=elem; e && group < e; e = symbol_parent(e) ) { names.push_front( cbl_field_of(e)->name ); } } -extern int yylineno; class find_corresponding { public: enum type_t { arith_op, move_op }; @@ -2966,6 +2970,7 @@ bool cobol_filename( const char *name, ino_t inode ) { linemap_add(line_table, LC_ENTER, sysp, name, 1); input_filename_vestige = name; bool pushed = input_filenames.push( input_file_t(name, inode, 1) ); + dbgmsg("%s: %s %s", __func__, pushed? "pushed" : "set to", name); return pushed; } @@ -2974,6 +2979,8 @@ cobol_lineno( int lineno ) { if( input_filenames.empty() ) return NULL; auto& input( input_filenames.top() ); input.lineno = lineno; + dbgmsg("%s:%d: saved %s, line %d", __func__, __LINE__, + input.name, input.lineno); return input.name; } @@ -2992,6 +2999,8 @@ cobol_lineno() { if( input_filenames.empty() ) return 0; size_t n = input_filenames.size() < 2? 0 : 1; const auto& input( input_filenames.peek(n) ); + dbgmsg("%s:%d: fetch %s, line %d", __func__, __LINE__, + input.name, input.lineno); return input.lineno; } @@ -3008,10 +3017,9 @@ cobol_filename_restore() { old_filenames[top.name] = top.inode; input_filename_vestige = top.name; - input_filenames.pop(); - if( input_filenames.empty() ) return; + dbgmsg("%s: LEAVE %s", __func__, top.name); - const auto& input = input_filenames.top(); + input_filenames.pop(); linemap_add(line_table, LC_LEAVE, sysp, NULL, 0); } diff --git a/libgcobol/intrinsic.cc b/libgcobol/intrinsic.cc index 680db5a52899..d3206b89679c 100644 --- a/libgcobol/intrinsic.cc +++ b/libgcobol/intrinsic.cc @@ -236,6 +236,9 @@ void string_to_dest(cblc_field_t *dest, const char *psz) { charmap_t *charmap = __gg__get_charmap(dest->encoding); + + __gg__adjust_dest_size(dest, charmap->strlen(psz)); + size_t dest_length = dest->capacity; size_t source_length = charmap->strlen(psz); size_t length = std::min(dest_length, source_length); @@ -1211,6 +1214,8 @@ __gg__char( cblc_field_t *dest, int converted_char = 0; memcpy(&converted_char, converted, charmap_dest->stride()); // Space fill the dest: + + __gg__adjust_dest_size(dest, charmap_dest->stride()); charmap_dest-> memset(dest->data, charmap_dest->mapped_character(ascii_space), dest->capacity); @@ -1307,8 +1312,8 @@ __gg__current_date(cblc_field_t *dest) retval, strlen(retval), &bytes_converted); - __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted); __gg__adjust_dest_size(dest, bytes_converted); + __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted); free(converted); } @@ -1556,15 +1561,17 @@ __gg__formatted_current_date( cblc_field_t *dest, // Destination string cbl_char_t format_Z = charmap_from->mapped_character(ascii_Z); cbl_char_t format_z = charmap_from->mapped_character(ascii_z); + // Establish the formatting string: + const char *format = PTRCAST(char, (input->data+input_offset)); + const char *format_end = format + input_size; + + __gg__adjust_dest_size(dest, format_end-format); + // Establish the destination, and set it to spaces char *d = PTRCAST(char, dest->data); const char *dend = d + dest->capacity; charmap_to->memset(d, dest_space, dest->capacity); - // Establish the formatting string: - const char *format = PTRCAST(char, (input->data+input_offset)); - const char *format_end = format + input_size; - bool is_zulu = false; const char *p = format; while( p < format_end ) @@ -1627,6 +1634,12 @@ __gg__formatted_date(cblc_field_t *dest, // Destination string charmap_t *charmap_to = __gg__get_charmap(to); charmap_t *charmap_from = __gg__get_charmap(from); + // Establish the formatting string: + char *format = PTRCAST(char, (arg1->data+arg1_offset)); + const char *format_end = format + arg1_size; + + __gg__adjust_dest_size(dest, format_end-format); + cbl_char_t dest_space = charmap_to->mapped_character(ascii_space); // Establish the destination, and set it to spaces @@ -1634,9 +1647,6 @@ __gg__formatted_date(cblc_field_t *dest, // Destination string const char *dend = d + dest->capacity; charmap_to->memset(d, dest_space, dest->capacity); - // Establish the formatting string: - char *format = PTRCAST(char, (arg1->data+arg1_offset)); - const char *format_end = format + arg1_size; struct cobol_tm ctm = {}; @@ -1651,7 +1661,6 @@ __gg__formatted_date(cblc_field_t *dest, // Destination string else { ftime_replace(d, dend, format, format_end, charmap_from, achftime); - __gg__adjust_dest_size(dest, format_end-format); } } @@ -1679,17 +1688,20 @@ __gg__formatted_datetime( cblc_field_t *dest, // Destination string charmap_t *charmap_from = __gg__get_charmap(from); charmap_t *charmap_to = __gg__get_charmap(to); - // Establish the destination, and set it to spaces - char *d = PTRCAST(char, (dest->data)); - const char *dend = d + dest->capacity; - memset(d, charmap_from->mapped_character(ascii_space), dest->capacity); - // Establish the formatting string: char *format = PTRCAST(char, (par1->data+par1_o)); char *format_end = format + par1_s; trim_trailing_spaces(format, format_end, charmap_from->mapped_character(ascii_space)); bool is_zulu = is_zulu_format(format, format_end, charmap_from); + __gg__adjust_dest_size(dest, format_end-format); + + // Establish the destination, and set it to spaces + char *d = PTRCAST(char, (dest->data)); + const char *dend = d + dest->capacity; + memset(d, charmap_from->mapped_character(ascii_space), dest->capacity); + + struct cobol_tm ctm = {}; populate_ctm_from_date(ctm, par2, par2_o, par2_s); @@ -1711,7 +1723,6 @@ __gg__formatted_datetime( cblc_field_t *dest, // Destination string else { ftime_replace(d, dend, format, format_end, charmap_from, achftime); - __gg__adjust_dest_size(dest, format_end-format); } } @@ -1738,11 +1749,6 @@ __gg__formatted_time( cblc_field_t *dest,// Destination string int dest_space = charmap_to->mapped_character(ascii_space); - // Establish the destination, and set it to spaces - char *d = PTRCAST(char, dest->data); - const char *dend = d + dest->capacity; - charmap_to->memset(d, dest_space, dest->capacity); - // Establish the formatting string: char *format = PTRCAST(char, (par1->data+par1_o)); char *format_end = format + par1_s; @@ -1751,6 +1757,14 @@ __gg__formatted_time( cblc_field_t *dest,// Destination string charmap_from->mapped_character(ascii_space)); bool is_zulu = is_zulu_format(format, format_end, charmap_from); + __gg__adjust_dest_size(dest, format_end-format); + + // Establish the destination, and set it to spaces + char *d = PTRCAST(char, dest->data); + const char *dend = d + dest->capacity; + charmap_to->memset(d, dest_space, dest->capacity); + + struct cobol_tm ctm = {}; populate_ctm_from_time( ctm, par2, @@ -1773,8 +1787,8 @@ __gg__formatted_time( cblc_field_t *dest,// Destination string } else { - ftime_replace(d, dend, format, format_end, charmap_from, achftime); __gg__adjust_dest_size(dest, format_end-format); + ftime_replace(d, dend, format, format_end, charmap_from, achftime); } } @@ -2135,13 +2149,13 @@ change_case( cblc_field_t *dest, free(duped); char *duped2 = static_cast<char *>(__gg__memdup(converted, converted_bytes)); + __gg__adjust_dest_size(dest, converted_bytes); __gg__field_from_string(dest, 0, dest->capacity, duped2, converted_bytes); free(duped2); - __gg__adjust_dest_size(dest, converted_bytes); } @@ -3598,6 +3612,7 @@ __gg__trim( cblc_field_t *dest, __gg__adjust_dest_size(dest, ncount); memmove(dest->data, left, ncount); + free(copy); } #if HAVE_INITSTATE_R && HAVE_SRANDOM_R && HAVE_RANDOM_R @@ -3698,7 +3713,6 @@ __gg__reverse(cblc_field_t *dest, charmap_t *charmap = __gg__get_charmap(to); size_t stride = charmap->stride(); - size_t dest_length = dest->capacity; // Convert the input to the destination encoding size_t bytes_converted; @@ -3710,6 +3724,8 @@ __gg__reverse(cblc_field_t *dest, // copy over characters from the end of the copy to the beginning of dest: size_t i_from = bytes_converted - stride; size_t i_to = 0; + __gg__adjust_dest_size(dest, bytes_converted); + size_t dest_length = dest->capacity; while( i_from < bytes_converted && i_to < dest_length ) { cbl_char_t ch = charmap->getch(converted, i_from); @@ -3717,7 +3733,6 @@ __gg__reverse(cblc_field_t *dest, i_from -= stride; i_to += stride; } - __gg__adjust_dest_size(dest, i_to); } extern "C" @@ -5682,7 +5697,10 @@ __gg__locale_compare( cblc_field_t *dest, } else { - // Default locale + // This code just isn't right. ISO says they can be of different classes; + // we are assuming they are the same class. We need to detect if one is + // national and the other alphanumeric/display, and convert the + // alphanumeric string to national before comparing. achretval[0] = '='; size_t length = std::min(arg1_s, arg2_s); for(size_t i=0; i<length; i++ ) @@ -5711,11 +5729,14 @@ __gg__locale_compare( cblc_field_t *dest, } } - __gg__convert_encoding(achretval, - DEFAULT_SOURCE_ENCODING, - dest->encoding); - memcpy(dest->data, achretval, strlen(achretval)); - __gg__adjust_dest_size(dest, strlen(achretval)); + size_t nbytes; + const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING, + dest->encoding, + achretval, + strlen(achretval), + &nbytes); + __gg__adjust_dest_size(dest, nbytes); + memcpy(dest->data, converted, nbytes); } extern "C" @@ -5754,8 +5775,8 @@ __gg__locale_date(cblc_field_t *dest, ach, strlen(ach), &bytes_converted); - __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted); __gg__adjust_dest_size(dest, bytes_converted); + __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted); free(converted); } @@ -5796,8 +5817,8 @@ __gg__locale_time(cblc_field_t *dest, ach, strlen(ach), &bytes_converted); - __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted); __gg__adjust_dest_size(dest, bytes_converted); + __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted); free(converted); } @@ -5839,7 +5860,7 @@ __gg__locale_time_from_seconds( cblc_field_t *dest, ach, strlen(ach), &bytes_converted); - __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted); __gg__adjust_dest_size(dest, bytes_converted); + __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted); free(converted); } diff --git a/libgcobol/libgcobol.cc b/libgcobol/libgcobol.cc index 81accd93f71f..3eca7787ee1c 100644 --- a/libgcobol/libgcobol.cc +++ b/libgcobol/libgcobol.cc @@ -3495,6 +3495,37 @@ format_for_display_internal(char **dest, index += source_rdigits; } (*dest)[index++] = NULLCH ; + if( var->attr & intermediate_e ) + { + if( value == 0 ) + { + strcpy(*dest, "0"); + } + else + { + // An intermediate is a rubber-band variable. It has no formal format. + // So, to make it cleaner for display purposes, let's clear off leading + // '+' characters and trailing zeroes. + if( **dest == ascii_plus ) + { + memmove(*dest, (*dest)+1, strlen(*dest)); + } + if( strchr(*dest, charmap->decimal_point()) ) + { + // There is a decimal point. Strip off trailing zeros: + char *p = *dest + strlen(*dest)-1; + while( *p == ascii_zero ) + { + *p-- = '\0'; + } + // And if we are left with just a decimal point, strip that off, too. + while( *p == charmap->decimal_point() ) + { + *p = '\0'; + } + } + } + } } break; @@ -5261,14 +5292,6 @@ init_var_both(cblc_field_t *var, char defaultbyte = flag_bits & DEFAULT_BYTE_MASK; unsigned int nsubscripts = (flag_bits & NSUBSCRIPT_MASK) >> NSUBSCRIPT_SHIFT; - if( var->data == NULL - && var->attr & (intermediate_e) - && var->type != FldLiteralA - && var->type != FldLiteralN ) - { - var->data = static_cast<unsigned char *>(malloc(var->capacity)); - } - // Set the "initialized" bit, which is tested in parser_symbol_add to make // sure this code gets executed only once. //fprintf(stderr, "__gg__initialize_variable %s setting initialize_e\n", var->name); @@ -11219,8 +11242,8 @@ __gg__get_argc(cblc_field_t *dest, size_t offset, size_t length) ach, strlen(ach), &nbytes ); - __gg__field_from_string(dest, offset, length, converted, nbytes); __gg__adjust_dest_size(dest, nbytes); + __gg__field_from_string(dest, offset, length, converted, nbytes); free(converted); } @@ -11262,8 +11285,8 @@ __gg__get_argv( cblc_field_t *dest, stashed_argv[N], strlen(stashed_argv[N]), &nbytes ); - __gg__field_from_string(dest, dest_offset, dest_length, converted, nbytes); __gg__adjust_dest_size(dest, nbytes); + __gg__field_from_string(dest, dest_offset, dest_length, converted, nbytes); free(converted); retcode = 0; // Okay } @@ -11307,8 +11330,8 @@ __gg__get_command_line( cblc_field_t *field, retval, strlen(retval), &nbytes ); - __gg__field_from_string(field, offset, flength, converted, nbytes); __gg__adjust_dest_size(field, nbytes); + __gg__field_from_string(field, offset, flength, converted, nbytes); free(converted); retcode = 0; // Okay } @@ -12588,16 +12611,16 @@ extern "C" void __gg__adjust_dest_size(cblc_field_t *dest, size_t ncount) { - if( dest->attr & (intermediate_e) ) + if( dest->attr & intermediate_e ) { - if( dest->allocated < ncount ) + // Make sure at least one byte is allocated; some routines get upset when + // dest->data is NULL even when dest->capacity is zero. + size_t alloc_size = std::max(1UL, ncount); + if( dest->allocated < alloc_size ) { - fprintf(stderr, "libgcobol.cc:__gg__adjust_dest_size(): " - "Adjusting %s size upward is not possible.\n", - dest->name); - abort(); -// dest->allocated = ncount; -// dest->data = (unsigned char *)realloc(dest->data, ncount); + dest->allocated = alloc_size; + free(dest->data); + dest->data = static_cast<unsigned char *>(malloc(alloc_size)); } dest->capacity = ncount; } @@ -12614,12 +12637,12 @@ __gg__adjust_encoding(cblc_field_t *field) PTRCAST(char, field->data), field->capacity, &nbytes); + __gg__adjust_dest_size(field, nbytes); size_t tocopy = std::min(nbytes, field->allocated); field->capacity = tocopy; memcpy(field->data, converted, tocopy); } - extern "C" void __gg__func_exception_location(cblc_field_t *dest) @@ -13970,7 +13993,7 @@ __gg__module_name(cblc_field_t *dest, module_type_t type) break; } - __gg__adjust_dest_size(dest, strlen(result)); + __gg__adjust_dest_size(dest, strlen(result)+1); memcpy(dest->data, result, strlen(result)+1); __gg__adjust_encoding(dest); } @@ -14313,9 +14336,19 @@ __gg__refer_from_string(cblc_field_t *field, { // 'string' has to be in the 'field' encoding. Use this when the input // might, or might not, be nul-terminated, and you don't want a - // nul-terminator in the data of the target field. + // nul-terminator in the data of the target field. For intermediates, the + // string must be nul-terminated charmap_t *charmap = __gg__get_charmap(field->encoding); + if( field->attr & intermediate_e ) + { + field_size = SSIZE_MAX; + } size_t nbytes = charmap->strlen(string, field_size); + if( field->attr & intermediate_e ) + { + __gg__adjust_dest_size(field, nbytes); + field_size = nbytes; + } __gg__field_from_string(field, field_offset, field_size, string, nbytes); } @@ -14326,15 +14359,20 @@ __gg__refer_from_psz(cblc_field_t *field, size_t field_size, const char *string) { - // 'string' has to be in the 'field' encoding. Use this when the input - // might, or might not, be nul-terminated, and you *do* want a - // nul-terminator in the data of the target field if there was one in the - // input. - - // One typical use is processing returned values from external C-style - // functions, which often return nul-terminated strings. + // 'string' has to be in the 'field' encoding. If the target is intermediate, + // It has to be nul-terminated in the field's encoding. charmap_t *charmap = __gg__get_charmap(field->encoding); + + if( field->attr & intermediate_e ) + { + field_size = SSIZE_MAX; + } size_t nbytes = charmap->strlen(string, field_size); + if( field->attr & intermediate_e ) + { + __gg__adjust_dest_size(field, nbytes); + field_size = nbytes; + } __gg__field_from_string(field, field_offset, field_size, @@ -14526,6 +14564,7 @@ __gg__convert(cblc_field_t *dest, // destination encoding: size_t i = 0; size_t d = 0; + __gg__adjust_dest_size(dest, 2*nbytes); while(i < nbytes && d < dest->capacity ) { cbl_char_t byte = charmap_tgt->getch(converted, &i); @@ -14537,7 +14576,6 @@ __gg__convert(cblc_field_t *dest, charmap_dest->putch(charmap_dest->mapped_character(lo), dest->data, &d); } free(converted); - __gg__adjust_dest_size(dest, d); } else if( dest_format == convert_byte_e ) { @@ -14550,6 +14588,7 @@ __gg__convert(cblc_field_t *dest, &nbytes); size_t i = 0; size_t d = 0; + __gg__adjust_dest_size(dest, 4*nbytes); while(i < nbytes && d < dest->capacity ) { // Each character is part of a string of hexadecimal digits. So, the @@ -14587,7 +14626,6 @@ __gg__convert(cblc_field_t *dest, } } free(converted); - __gg__adjust_dest_size(dest, d); } else { @@ -14597,10 +14635,10 @@ __gg__convert(cblc_field_t *dest, input_o, input_s, &nbytes); + __gg__adjust_dest_size(dest, nbytes); size_t len = std::min(nbytes, dest->capacity); memcpy(dest->data, converted, len); free(converted); - __gg__adjust_dest_size(dest, len); } } diff --git a/libgcobol/posix/shim/lseek.cc b/libgcobol/posix/shim/lseek.cc index 52407ed1df7f..4e2ec17d80a6 100644 --- a/libgcobol/posix/shim/lseek.cc +++ b/libgcobol/posix/shim/lseek.cc @@ -8,22 +8,22 @@ extern "C" { -off_t +off_t posix_lseek(int fd, off_t offset, int whence) { static const std::map<int, int> whences { { 2, SEEK_SET }, { 4, SEEK_CUR }, { 8, SEEK_END }, - }; + }; - /* - * Map valid input whence value onto C standard library value. - * Invalid values are passed through and rejected by lseek(2) per its documentation. + /* + * Map valid input whence value onto C standard library value. + * Invalid values are passed through and rejected by lseek(2) per its documentation. * (The caller always needs to check for errors anyway.) */ auto p = whences.find(whence); - if( p != whences.end() ) whence = p.second; + if( p != whences.end() ) whence = p->second; return lseek(fd, offset, whence); } diff --git a/libgcobol/posix/shim/open.cc b/libgcobol/posix/shim/open.cc index 7207ac951bbe..561d2d3f715c 100644 --- a/libgcobol/posix/shim/open.cc +++ b/libgcobol/posix/shim/open.cc @@ -16,55 +16,55 @@ extern "C" { #include "stat.h" int -posix_opent(const char *pathname, int cbl_flags, int cbl_mode) { +posix_open(const char *pathname, int cbl_flags, int cbl_mode) { static const std::map<int, int> flag_bits { { cbl::PSX_O_RDONLY, O_RDONLY }, { cbl::PSX_O_WRONLY, O_WRONLY }, - { cbl::PSX_O_RDWR, O_RDWR }, + { cbl::PSX_O_RDWR, O_RDWR }, { cbl::PSX_O_CREAT, O_CREAT }, - { cbl::PSX_O_EXCL, O_EXCL }, - { cbl::PSX_O_NOCTTY, O_NOCTTY }, + { cbl::PSX_O_EXCL, O_EXCL }, + { cbl::PSX_O_NOCTTY, O_NOCTTY }, { cbl::PSX_O_TRUNC, O_TRUNC }, - { cbl::PSX_O_APPEND, O_APPEND }, + { cbl::PSX_O_APPEND, O_APPEND }, { cbl::PSX_O_NONBLOCK, O_NONBLOCK }, { cbl::PSX_O_DSYNC, O_DSYNC }, - { cbl::PSX_O_DIRECT, O_DIRECT }, - { cbl::PSX_O_LARGEFILE, O_LARGEFILE }, - { cbl::PSX_O_DIRECTORY, O_DIRECTORY }, - { cbl::PSX_O_NOFOLLOW, O_NOFOLLOW }, - { cbl::PSX_O_NOATIME, O_NOATIME }, - { cbl::PSX_O_CLOEXEC, O_CLOEXEC }, - { cbl::PSX_O_SYNC, O_SYNC }, - { cbl::PSX_O_PATH, O_PATH }, - { cbl::PSX_O_TMPFILE, O_TMPFILE }, - }; + { cbl::PSX_O_DIRECT, O_DIRECT }, + { cbl::PSX_O_LARGEFILE, O_LARGEFILE }, + { cbl::PSX_O_DIRECTORY, O_DIRECTORY }, + { cbl::PSX_O_NOFOLLOW, O_NOFOLLOW }, + { cbl::PSX_O_NOATIME, O_NOATIME }, + { cbl::PSX_O_CLOEXEC, O_CLOEXEC }, + { cbl::PSX_O_SYNC, O_SYNC }, + { cbl::PSX_O_PATH, O_PATH }, + { cbl::PSX_O_TMPFILE, O_TMPFILE }, + }; static const std::map<int, int> mode_bits { - { cbl::PSX_S_IXOTH, S_IXOTH }, - { cbl::PSX_S_IWOTH, S_IWOTH }, - { cbl::PSX_S_IROTH, S_IROTH }, - { cbl::PSX_S_IRWXO, S_IRWXO }, - { cbl::PSX_S_IXGRP, S_IXGRP }, - { cbl::PSX_S_IWGRP, S_IWGRP }, - { cbl::PSX_S_IRGRP, S_IRGRP }, - { cbl::PSX_S_IRWXG, S_IRWXG }, - { cbl::PSX_S_IXUSR, S_IXUSR }, - { cbl::PSX_S_IWUSR, S_IWUSR }, - { cbl::PSX_S_IRUSR, S_IRUSR }, - { cbl::PSX_S_IRWXU, S_IRWXU }, - { cbl::PSX_S_ISVTX, S_ISVTX }, - { cbl::PSX_S_ISGID, S_ISGID }, - { cbl::PSX_S_ISUID, S_ISUID }, + { cbl::PSX_S_IXOTH, S_IXOTH }, + { cbl::PSX_S_IWOTH, S_IWOTH }, + { cbl::PSX_S_IROTH, S_IROTH }, + { cbl::PSX_S_IRWXO, S_IRWXO }, + { cbl::PSX_S_IXGRP, S_IXGRP }, + { cbl::PSX_S_IWGRP, S_IWGRP }, + { cbl::PSX_S_IRGRP, S_IRGRP }, + { cbl::PSX_S_IRWXG, S_IRWXG }, + { cbl::PSX_S_IXUSR, S_IXUSR }, + { cbl::PSX_S_IWUSR, S_IWUSR }, + { cbl::PSX_S_IRUSR, S_IRUSR }, + { cbl::PSX_S_IRWXU, S_IRWXU }, + { cbl::PSX_S_ISVTX, S_ISVTX }, + { cbl::PSX_S_ISGID, S_ISGID }, + { cbl::PSX_S_ISUID, S_ISUID }, }; - + int flags = 0; mode_t mode = 0; - + for( auto elem : flag_bits ) { int cbl_bit = elem.first; int std_bit = elem.second; - + if( cbl_bit == (cbl_bit & cbl_flags) ) { flags |= std_bit; } @@ -73,7 +73,7 @@ posix_opent(const char *pathname, int cbl_flags, int cbl_mode) { for( auto elem : mode_bits ) { int cbl_bit = elem.first; int std_bit = elem.second; - + if( cbl_bit == (cbl_bit & cbl_mode) ) { mode |= std_bit; } diff --git a/libgcobol/posix/udf/posix-ftruncate.cbl b/libgcobol/posix/udf/posix-ftruncate.cbl new file mode 100644 index 000000000000..75b235210d90 --- /dev/null +++ b/libgcobol/posix/udf/posix-ftruncate.cbl @@ -0,0 +1,23 @@ + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * This function is in the public domain. + * Contributed by smckinney of COBOLworx Feb 2026. + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * int ftruncate(int fd, off_t length); + Identification Division. + Function-ID. posix-ftruncate. + Data Division. + Linkage Section. + 77 Return-Value Binary-Long. + 01 Lk-fd PIC 9(8) Usage COMP. + 01 Lk-offset Binary-Long. + Procedure Division using + By Value Lk-fd, + By Value Lk-offset, + Returning Return-Value. + Display 'posix-ftruncate fd: ' Lk-fd ', Lk-offset: ' Lk-offset. + Call "ftruncate" using + By Value Lk-fd, + By Value Lk-offset, + Returning Return-Value. + Goback. + End Function posix-ftruncate. diff --git a/libgcobol/posix/udf/posix-open.cbl b/libgcobol/posix/udf/posix-open.cbl index efbeeee4b257..db46543410ea 100644 --- a/libgcobol/posix/udf/posix-open.cbl +++ b/libgcobol/posix/udf/posix-open.cbl @@ -11,7 +11,7 @@ With Debugging Mode >>END-IF . - + Data Division. Working-Storage Section. 77 Ws-pathname PIC X(8192). @@ -20,17 +20,17 @@ Linkage Section. 77 Return-Value Binary-Long. 01 Lk-pathname PIC X ANY LENGTH. - 01 Lk-flags PIC 9(8). + 01 Lk-flags PIC 9(8) Binary-long. 01 Lk-mode PIC 9(8). Procedure Division using By Reference Lk-pathname, By Reference Lk-flags, - By Reference Optional Lk-mode + By Reference Optional Lk-mode Returning Return-Value. Move Lk-pathname To Ws-pathname. - Inspect Ws-pathname + Inspect Ws-pathname Replacing Trailing Space By Low-Value D Display 'posix-open: Ws-pathname ', Ws-pathname. @@ -40,8 +40,10 @@ If ws-mode-ptr > 0 Then *> O_CREAT requires mode Move Lk-mode to Ws-mode. - - Call "posix_open" using Ws-pathname, Lk-flags, Ws-mode, + + Call "posix_open" using Ws-pathname, + By Value Lk-flags, + By Value Ws-mode, Returning Return-Value. Goback. diff --git a/libgcobol/posix/udf/posix-read.cbl b/libgcobol/posix/udf/posix-read.cbl index 2c057331da85..f0ea36b34267 100644 --- a/libgcobol/posix/udf/posix-read.cbl +++ b/libgcobol/posix/udf/posix-read.cbl @@ -1,14 +1,14 @@ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * This function is in the public domain. * Contributed by James K. Lowden of COBOLworx November 2025. - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * long read( int fd, void * buf, unsigned long count) Identification Division. Function-ID. posix-read. Data Division. Linkage Section. 77 Return-Value Binary-Long. - 01 Lk-fd PIC 9(8) Usage COMP. + 01 Lk-fd PIC 9(8) Usage COMP-5. 01 Lk-buf PIC X ANY LENGTH. 01 Lk-count PIC 9(8) Usage COMP. Procedure Division using diff --git a/libgcobol/posix/udf/posix-write.cbl b/libgcobol/posix/udf/posix-write.cbl index e5513402c113..4dfd306a62ab 100644 --- a/libgcobol/posix/udf/posix-write.cbl +++ b/libgcobol/posix/udf/posix-write.cbl @@ -1,14 +1,14 @@ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * This function is in the public domain. * Contributed by James K. Lowden of COBOLworx November 2025. - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * long write( int fd, const void * buf, unsigned long count) Identification Division. Function-ID. posix-write. Data Division. Linkage Section. 77 Return-Value Binary-Long. - 01 Lk-fd PIC 9(8) Usage COMP. + 01 Lk-fd PIC 9(8) Usage COMP-5. 01 Lk-buf PIC X ANY LENGTH. 01 Lk-count PIC 9(8) Usage COMP. Procedure Division using diff --git a/libgcobol/xmlparse.cc b/libgcobol/xmlparse.cc index 8ad189d102a0..b480cff6c8af 100644 --- a/libgcobol/xmlparse.cc +++ b/libgcobol/xmlparse.cc @@ -63,7 +63,7 @@ void sayso( const char func[], int line, if( getenv("XMLPARSE") ) { switch(len) { case 0: - fprintf(stderr, "%s:%d Kilroy was here\n", func, line); + fprintf(stderr, "%s:%d was here\n", func, line); break; case -1: fprintf(stderr, "%s:%d: '%s'\n", func, line, data);
