* tp/Texinfo/ParserNonXS.pm, tp/Texinfo/XS/parsetexi/Parsetexi.pm (parser, simple_parser): Re-add simple_parser, used for document translations. The only difference from before is that we do not share the indices between parsers. * tp/Texinfo/Translations.pm (replace_convert_substrings): Call simple_parser again.
This reverses the change on 2023-08-10. --- ChangeLog | 13 +++ tp/Texinfo/ParserNonXS.pm | 114 +++++++++++++++++++++------ tp/Texinfo/Translations.pm | 2 +- tp/Texinfo/XS/parsetexi/Parsetexi.pm | 4 + 4 files changed, 107 insertions(+), 26 deletions(-) diff --git a/ChangeLog b/ChangeLog index 318e84f2e5..b50d9056e8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2023-11-25 Gavin Smith <[email protected]> + + Re-add simple_parser + + * tp/Texinfo/ParserNonXS.pm, tp/Texinfo/XS/parsetexi/Parsetexi.pm + (parser, simple_parser): Re-add simple_parser, used for document + translations. The only difference from before is that we + do not share the indices between parsers. + * tp/Texinfo/Translations.pm (replace_convert_substrings): + Call simple_parser again. + + This reverses the change on 2023-08-10. + 2023-11-24 Patrice Dumas <[email protected]> * tp/Texinfo/Common.pm (parse_node_manual), tp/Texinfo/Convert/HTML.pm diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm index 85b9a165bd..6a1a7ac2df 100644 --- a/tp/Texinfo/ParserNonXS.pm +++ b/tp/Texinfo/ParserNonXS.pm @@ -572,31 +572,8 @@ sub parser(;$$) my $parser = dclone(\%parser_default_configuration); bless $parser; - $parser->{'set'} = {}; - if (defined($conf)) { - foreach my $key (keys(%$conf)) { - if (exists($parser_settable_configuration{$key})) { - # we keep registrar instead of copying on purpose, to reuse the object - if ($key ne 'values' and $key ne 'registrar' and ref($conf->{$key})) { - $parser->{$key} = dclone($conf->{$key}); - } else { - $parser->{$key} = $conf->{$key}; - } - if ($initialization_overrides{$key}) { - $parser->{'set'}->{$key} = $parser->{$key}; - } - } else { - warn "ignoring parser configuration value \"$key\"\n"; - } - } - } - # restrict variables found by get_conf, and set the values to the - # parser initialization values only. What is found in the document - # has no effect. - foreach my $key (keys(%Texinfo::Common::default_parser_customization_values)) { - $parser->{'conf'}->{$key} = $parser->{$key}; - } - + _setup_conf($parser, $conf); + # This is not very useful in perl, but mimics the XS parser print STDERR "!!!!!!!!!!!!!!!! RESETTING THE PARSER !!!!!!!!!!!!!!!!!!!!!\n" if ($parser->{'DEBUG'}); @@ -611,6 +588,7 @@ sub parser(;$$) $parser->{'close_paragraph_commands'} = {%default_close_paragraph_commands}; $parser->{'close_preformatted_commands'} = {%close_preformatted_commands}; + # following is common with simple_parser # other initializations $parser->{'definfoenclose'} = {}; $parser->{'source_mark_counters'} = {}; @@ -637,6 +615,66 @@ sub parser(;$$) return $parser; } +# simple parser initialization. The only difference with a regular parser +# is that the dynamical @-commands groups and indices information references +# that are initialized in each regular parser are initialized once for all +# and shared among simple parsers. It is used in gdt() and this has a sizable +# effect on performance. +my $simple_parser_line_commands = dclone(\%line_commands); +my $simple_parser_brace_commands = dclone(\%brace_commands); +my $simple_parser_valid_nestings = dclone(\%default_valid_nestings); +my $simple_parser_no_paragraph_commands = {%default_no_paragraph_commands}; +my $simple_parser_index_names = dclone(\%index_names); +my $simple_parser_command_index = {%command_index}; +my $simple_parser_close_paragraph_commands = {%default_close_paragraph_commands}; +my $simple_parser_close_preformatted_commands = {%close_preformatted_commands}; +sub simple_parser(;$) +{ + my $conf = shift; + + my $parser = dclone(\%parser_default_configuration); + bless $parser; + + _setup_conf($parser, $conf); + # This is not very useful in perl, but mimics the XS parser + print STDERR "!!!!!!!!!!!!!!!! RESETTING THE PARSER !!!!!!!!!!!!!!!!!!!!!\n" + if ($parser->{'DEBUG'}); + + $parser->{'line_commands'} = $simple_parser_line_commands; + $parser->{'brace_commands'} = $simple_parser_brace_commands; + $parser->{'valid_nestings'} = $simple_parser_valid_nestings; + $parser->{'no_paragraph_commands'} = $simple_parser_no_paragraph_commands; + #$parser->{'index_names'} = $simple_parser_index_names; + $parser->{'index_names'} = dclone(\%index_names); + $parser->{'command_index'} = $simple_parser_command_index; + $parser->{'close_paragraph_commands'} = $simple_parser_close_paragraph_commands; + $parser->{'close_preformatted_commands'} = $simple_parser_close_preformatted_commands; + + # other initializations + $parser->{'definfoenclose'} = {}; + $parser->{'source_mark_counters'} = {}; + $parser->{'nesting_context'} = {%nesting_context_init}; + $parser->{'nesting_context'}->{'basic_inline_stack'} = []; + $parser->{'nesting_context'}->{'basic_inline_stack_on_line'} = []; + $parser->{'nesting_context'}->{'basic_inline_stack_block'} = []; + $parser->{'nesting_context'}->{'regions_stack'} = []; + $parser->{'basic_inline_commands'} = {%default_basic_inline_commands}; + + $parser->_init_context_stack(); + + # turn the array to a hash for speed. Not sure it really matters for such + # a small array. + foreach my $expanded_format(@{$parser->{'EXPANDED_FORMATS'}}) { + $parser->{'expanded_formats_hash'}->{$expanded_format} = 1; + } + + if (not defined($parser->{'registrar'})) { + $parser->{'registrar'} = Texinfo::Report::new(); + } + + return $parser; +} + sub get_conf($$) { my ($self, $var) = @_; @@ -935,6 +973,32 @@ sub registered_errors($) sub _setup_conf($$) { + my ($parser, $conf) = @_; + + $parser->{'set'} = {}; + if (defined($conf)) { + foreach my $key (keys(%$conf)) { + if (exists($parser_settable_configuration{$key})) { + # we keep registrar instead of copying on purpose, to reuse the object + if ($key ne 'values' and $key ne 'registrar' and ref($conf->{$key})) { + $parser->{$key} = dclone($conf->{$key}); + } else { + $parser->{$key} = $conf->{$key}; + } + if ($initialization_overrides{$key}) { + $parser->{'set'}->{$key} = $parser->{$key}; + } + } else { + warn "ignoring parser configuration value \"$key\"\n"; + } + } + } + # restrict variables found by get_conf, and set the values to the + # parser initialization values only. What is found in the document + # has no effect. + foreach my $key (keys(%Texinfo::Common::default_parser_customization_values)) { + $parser->{'conf'}->{$key} = $parser->{$key}; + } } # Following are the internal parsing subroutines. The most important are diff --git a/tp/Texinfo/Translations.pm b/tp/Texinfo/Translations.pm index f9329499c0..c91689a82c 100644 --- a/tp/Texinfo/Translations.pm +++ b/tp/Texinfo/Translations.pm @@ -398,7 +398,7 @@ sub replace_convert_substrings($$;$) # } #} } - my $parser = Texinfo::Parser::parser($parser_conf); + my $parser = Texinfo::Parser::simple_parser($parser_conf); if ($customization_information->get_conf('DEBUG')) { print STDERR "IN TR PARSER '$texinfo_line'\n"; diff --git a/tp/Texinfo/XS/parsetexi/Parsetexi.pm b/tp/Texinfo/XS/parsetexi/Parsetexi.pm index 1ced996e7a..7e3d9fe23b 100644 --- a/tp/Texinfo/XS/parsetexi/Parsetexi.pm +++ b/tp/Texinfo/XS/parsetexi/Parsetexi.pm @@ -76,6 +76,10 @@ sub get_conf($$) return $self->{'conf'}->{$var}; } +sub simple_parser { + goto &parser; +} + # Initialize the parser sub parser (;$$) { -- 2.34.1
