On Mon, Jan 04, 2010 at 06:38:03PM -0500, Andrew Dunstan wrote:
> Andrew Dunstan wrote:
> >>
> >>Yes. I believe the test is highlighting an existing problem: that plperl
> >>function in non-PG_UTF8 databases can't use regular expressions that
> >>require unicode character meta-data.
> >>
> >>Either the (GetDatabaseEncoding() == PG_UTF8) test in plperl_safe_init()
> >>should be removed, so the utf8fix function is always called, or the
> >>test should be removed (or hacked to only apply to PG_UTF8 databases).
> >
> >I tried forcing the test, but it doesn't seem to work, possibly
> >because in the case that the db is not utf8 we aren't forcing
> >argument strings to UTF8 :-(
> >
> >I think we might need to remove the test from the patch.
> 
> I have not been able to come up with a fix for this - the whole
> thing seems very fragile. I'm going to commit what remains of this
> patch, but not add the extra regression test. I'll add a TODO to
> allow plperl to do utf8 operations in non-utf8 databases.

I see you've not commited it yet, so to help out I've attached
a new diff, over the current CVS head, with two minor changes:

- Removed the test, as noted above.
- Optimized pg_verifymbstr calls to avoid unneeded strlen()s.

This should apply cleanly to cvs, saving you the need to resolve the
conflicts caused by the recent pg_verifymbstr patch.
I'll add it to the commitfest once it reaches the archives.

Tim.

diff --git a/doc/src/sgml/plperl.sgml b/doc/src/sgml/plperl.sgml
index 7eebfba..37114bd 100644
*** a/doc/src/sgml/plperl.sgml
--- b/doc/src/sgml/plperl.sgml
***************
*** 14,20 ****
    <para>
     PL/Perl is a loadable procedural language that enables you to write
     <productname>PostgreSQL</productname> functions in the 
!    <ulink url="http://www.perl.com";>Perl programming language</ulink>.
    </para>
  
    <para>
--- 14,20 ----
    <para>
     PL/Perl is a loadable procedural language that enables you to write
     <productname>PostgreSQL</productname> functions in the 
!    <ulink url="http://www.perl.org";>Perl programming language</ulink>.
    </para>
  
    <para>
*************** SELECT * FROM perl_set();
*** 313,319 ****
  use strict;
  </programlisting>
     in the function body.  But this only works in <application>PL/PerlU</>
!    functions, since <literal>use</> is not a trusted operation.  In
     <application>PL/Perl</> functions you can instead do:
  <programlisting>
  BEGIN { strict->import(); }
--- 313,320 ----
  use strict;
  </programlisting>
     in the function body.  But this only works in <application>PL/PerlU</>
!    functions, since the <literal>use</> triggers a <literal>require</>
!    which is not a trusted operation.  In
     <application>PL/Perl</> functions you can instead do:
  <programlisting>
  BEGIN { strict->import(); }
diff --git a/src/pl/plperl/GNUmakefile b/src/pl/plperl/GNUmakefile
index a3c3495..8989b14 100644
*** a/src/pl/plperl/GNUmakefile
--- b/src/pl/plperl/GNUmakefile
*************** PSQLDIR = $(bindir)
*** 45,50 ****
--- 45,55 ----
  
  include $(top_srcdir)/src/Makefile.shlib
  
+ plperl.o: perlchunks.h
+ 
+ perlchunks.h: plc_*.pl
+ 	$(PERL) text2macro.pl --strip='^(\#.*|\s*)$$' plc_*.pl > perlchunks.htmp
+ 	mv perlchunks.htmp perlchunks.h
  
  all: all-lib
  
*************** submake:
*** 65,71 ****
  	$(MAKE) -C $(top_builddir)/src/test/regress pg_regress$(X)
  
  clean distclean maintainer-clean: clean-lib
! 	rm -f SPI.c $(OBJS)
  	rm -rf results
  	rm -f regression.diffs regression.out
  
--- 70,76 ----
  	$(MAKE) -C $(top_builddir)/src/test/regress pg_regress$(X)
  
  clean distclean maintainer-clean: clean-lib
! 	rm -f SPI.c $(OBJS) perlchunks.htmp perlchunks.h
  	rm -rf results
  	rm -f regression.diffs regression.out
  
diff --git a/src/pl/plperl/plc_perlboot.pl b/src/pl/plperl/plc_perlboot.pl
index ...d2d5518 .
*** a/src/pl/plperl/plc_perlboot.pl
--- b/src/pl/plperl/plc_perlboot.pl
***************
*** 0 ****
--- 1,50 ----
+ SPI::bootstrap();
+ use vars qw(%_SHARED);
+ 
+ sub ::plperl_warn {
+ 	(my $msg = shift) =~ s/\(eval \d+\) //g;
+ 	&elog(&NOTICE, $msg);
+ }
+ $SIG{__WARN__} = \&::plperl_warn;
+ 
+ sub ::plperl_die {
+ 	(my $msg = shift) =~ s/\(eval \d+\) //g;
+     die $msg;
+ }
+ $SIG{__DIE__} = \&::plperl_die;
+ 
+ sub ::mkunsafefunc {
+ 	my $ret = eval(qq[ sub { $_[0] $_[1] } ]);
+ 	$@ =~ s/\(eval \d+\) //g if $@;
+ 	return $ret;
+ }
+ 
+ use strict;
+ 
+ sub ::mk_strict_unsafefunc {
+ 	my $ret = eval(qq[ sub { use strict; $_[0] $_[1] } ]);
+ 	$@ =~ s/\(eval \d+\) //g if $@;
+ 	return $ret;
+ }
+ 
+ sub ::_plperl_to_pg_array {
+   my $arg = shift;
+   ref $arg eq 'ARRAY' || return $arg;
+   my $res = '';
+   my $first = 1;
+   foreach my $elem (@$arg) {
+     $res .= ', ' unless $first; $first = undef;
+     if (ref $elem) {
+       $res .= _plperl_to_pg_array($elem);
+     }
+     elsif (defined($elem)) {
+       my $str = qq($elem);
+       $str =~ s/([\"\\])/\\$1/g;
+       $res .= qq(\"$str\");
+     }
+     else {
+       $res .= 'NULL' ;
+     }
+   }
+   return qq({$res});
+ }
diff --git a/src/pl/plperl/plc_safe_bad.pl b/src/pl/plperl/plc_safe_bad.pl
index ...838ccc6 .
*** a/src/pl/plperl/plc_safe_bad.pl
--- b/src/pl/plperl/plc_safe_bad.pl
***************
*** 0 ****
--- 1,15 ----
+ use vars qw($PLContainer);
+ 
+ $PLContainer = new Safe('PLPerl');
+ $PLContainer->permit_only(':default');
+ $PLContainer->share(qw[&elog &ERROR]);
+ 
+ my $msg = 'trusted Perl functions disabled - please upgrade Perl Safe module to version 2.09 or later';
+ sub ::mksafefunc {
+   return $PLContainer->reval(qq[sub { elog(ERROR,'$msg') }]);
+ }
+ 
+ sub ::mk_strict_safefunc {
+   return $PLContainer->reval(qq[sub { elog(ERROR,'$msg') }]);
+ }
+ 
diff --git a/src/pl/plperl/plc_safe_ok.pl b/src/pl/plperl/plc_safe_ok.pl
index ...73c5573 .
*** a/src/pl/plperl/plc_safe_ok.pl
--- b/src/pl/plperl/plc_safe_ok.pl
***************
*** 0 ****
--- 1,33 ----
+ use vars qw($PLContainer);
+ 
+ $PLContainer = new Safe('PLPerl');
+ $PLContainer->permit_only(':default');
+ $PLContainer->permit(qw[:base_math !:base_io sort time]);
+ 
+ $PLContainer->share(qw[&elog &return_next
+ 	&spi_query &spi_fetchrow &spi_cursor_close &spi_exec_query
+ 	&spi_prepare &spi_exec_prepared &spi_query_prepared &spi_freeplan
+ 	&_plperl_to_pg_array
+ 	&DEBUG &LOG &INFO &NOTICE &WARNING &ERROR %_SHARED
+ ]);
+ 
+ # Load strict into the container.
+ # The temporary enabling of the caller opcode here is to work around a
+ # bug in perl 5.10, which unkindly changed the way its Safe.pm works, without
+ # notice. It is quite safe, as caller is informational only, and in any case
+ # we only enable it while we load the 'strict' module.
+ $PLContainer->permit(qw[require caller]);
+ $PLContainer->reval('use strict;');
+ $PLContainer->deny(qw[require caller]);
+ 
+ sub ::mksafefunc {
+ 	my $ret = $PLContainer->reval(qq[sub { $_[0] $_[1] }]);
+ 	$@ =~ s/\(eval \d+\) //g if $@;
+ 	return $ret;
+ }
+ 
+ sub ::mk_strict_safefunc {
+ 	my $ret = $PLContainer->reval(qq[sub { BEGIN { strict->import(); } $_[0] $_[1] }]);
+ 	$@ =~ s/\(eval \d+\) //g if $@;
+ 	return $ret;
+ }
diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c
index 08af5e8..5cd7dd5 100644
*** a/src/pl/plperl/plperl.c
--- b/src/pl/plperl/plperl.c
***************
*** 43,48 ****
--- 43,51 ----
  /* perl stuff */
  #include "plperl.h"
  
+ /* string literal macros defining chunks of perl code */
+ #include "perlchunks.h"
+ 
  PG_MODULE_MAGIC;
  
  /**********************************************************************
*************** typedef enum
*** 125,133 ****
  } InterpState;
  
  static InterpState interp_state = INTERP_NONE;
- static bool can_run_two = false;
  
- static bool plperl_safe_init_done = false;
  static PerlInterpreter *plperl_trusted_interp = NULL;
  static PerlInterpreter *plperl_untrusted_interp = NULL;
  static PerlInterpreter *plperl_held_interp = NULL;
--- 128,134 ----
*************** Datum		plperl_inline_handler(PG_FUNCTION
*** 148,154 ****
  Datum		plperl_validator(PG_FUNCTION_ARGS);
  void		_PG_init(void);
  
! static void plperl_init_interp(void);
  
  static Datum plperl_func_handler(PG_FUNCTION_ARGS);
  static Datum plperl_trigger_handler(PG_FUNCTION_ARGS);
--- 149,155 ----
  Datum		plperl_validator(PG_FUNCTION_ARGS);
  void		_PG_init(void);
  
! static PerlInterpreter *plperl_init_interp(void);
  
  static Datum plperl_func_handler(PG_FUNCTION_ARGS);
  static Datum plperl_trigger_handler(PG_FUNCTION_ARGS);
*************** static plperl_proc_desc *compile_plperl_
*** 157,171 ****
  
  static SV  *plperl_hash_from_tuple(HeapTuple tuple, TupleDesc tupdesc);
  static void plperl_init_shared_libs(pTHX);
  static HV  *plperl_spi_execute_fetch_result(SPITupleTable *, int, int);
  static SV  *newSVstring(const char *str);
  static SV **hv_store_string(HV *hv, const char *key, SV *val);
  static SV **hv_fetch_string(HV *hv, const char *key);
! static SV  *plperl_create_sub(const char *proname, const char *s, bool trusted);
  static SV  *plperl_call_perl_func(plperl_proc_desc *desc, FunctionCallInfo fcinfo);
  static void plperl_compile_callback(void *arg);
  static void plperl_exec_callback(void *arg);
  static void plperl_inline_callback(void *arg);
  
  /*
   * This routine is a crock, and so is everyplace that calls it.  The problem
--- 158,174 ----
  
  static SV  *plperl_hash_from_tuple(HeapTuple tuple, TupleDesc tupdesc);
  static void plperl_init_shared_libs(pTHX);
+ static void plperl_safe_init(void);
  static HV  *plperl_spi_execute_fetch_result(SPITupleTable *, int, int);
  static SV  *newSVstring(const char *str);
  static SV **hv_store_string(HV *hv, const char *key, SV *val);
  static SV **hv_fetch_string(HV *hv, const char *key);
! static void plperl_create_sub(plperl_proc_desc *desc, char *s);
  static SV  *plperl_call_perl_func(plperl_proc_desc *desc, FunctionCallInfo fcinfo);
  static void plperl_compile_callback(void *arg);
  static void plperl_exec_callback(void *arg);
  static void plperl_inline_callback(void *arg);
+ static char *sv2text_mbverified(const SV *sv);
  
  /*
   * This routine is a crock, and so is everyplace that calls it.  The problem
*************** _PG_init(void)
*** 228,325 ****
  									&hash_ctl,
  									HASH_ELEM);
  
! 	plperl_init_interp();
  
  	inited = true;
  }
  
- /* Each of these macros must represent a single string literal */
- 
- #define PERLBOOT \
- 	"SPI::bootstrap(); use vars qw(%_SHARED);" \
- 	"sub ::plperl_warn { my $msg = shift; " \
- 	"       $msg =~ s/\\(eval \\d+\\) //g; &elog(&NOTICE, $msg); } " \
- 	"$SIG{__WARN__} = \\&::plperl_warn; " \
- 	"sub ::plperl_die { my $msg = shift; " \
- 	"       $msg =~ s/\\(eval \\d+\\) //g; die $msg; } " \
- 	"$SIG{__DIE__} = \\&::plperl_die; " \
- 	"sub ::mkunsafefunc {" \
- 	"      my $ret = eval(qq[ sub { $_[0] $_[1] } ]); " \
- 	"      $@ =~ s/\\(eval \\d+\\) //g if $@; return $ret; }" \
- 	"use strict; " \
- 	"sub ::mk_strict_unsafefunc {" \
- 	"      my $ret = eval(qq[ sub { use strict; $_[0] $_[1] } ]); " \
- 	"      $@ =~ s/\\(eval \\d+\\) //g if $@; return $ret; } " \
- 	"sub ::_plperl_to_pg_array {" \
- 	"  my $arg = shift; ref $arg eq 'ARRAY' || return $arg; " \
- 	"  my $res = ''; my $first = 1; " \
- 	"  foreach my $elem (@$arg) " \
- 	"  { " \
- 	"    $res .= ', ' unless $first; $first = undef; " \
- 	"    if (ref $elem) " \
- 	"    { " \
- 	"      $res .= _plperl_to_pg_array($elem); " \
- 	"    } " \
- 	"    elsif (defined($elem)) " \
- 	"    { " \
- 	"      my $str = qq($elem); " \
- 	"      $str =~ s/([\"\\\\])/\\\\$1/g; " \
- 	"      $res .= qq(\"$str\"); " \
- 	"    } " \
- 	"    else " \
- 	"    { "\
- 	"      $res .= 'NULL' ; " \
- 	"    } "\
- 	"  } " \
- 	"  return qq({$res}); " \
- 	"} "
- 
  #define SAFE_MODULE \
  	"require Safe; $Safe::VERSION"
  
- /*
-  * The temporary enabling of the caller opcode here is to work around a
-  * bug in perl 5.10, which unkindly changed the way its Safe.pm works, without
-  * notice. It is quite safe, as caller is informational only, and in any case
-  * we only enable it while we load the 'strict' module.
-  */
- 
- #define SAFE_OK \
- 	"use vars qw($PLContainer); $PLContainer = new Safe('PLPerl');" \
- 	"$PLContainer->permit_only(':default');" \
- 	"$PLContainer->permit(qw[:base_math !:base_io sort time]);" \
- 	"$PLContainer->share(qw[&elog &spi_exec_query &return_next " \
- 	"&spi_query &spi_fetchrow &spi_cursor_close " \
- 	"&spi_prepare &spi_exec_prepared &spi_query_prepared &spi_freeplan " \
- 	"&_plperl_to_pg_array " \
- 	"&DEBUG &LOG &INFO &NOTICE &WARNING &ERROR %_SHARED ]);" \
- 	"sub ::mksafefunc {" \
- 	"      my $ret = $PLContainer->reval(qq[sub { $_[0] $_[1] }]); " \
- 	"      $@ =~ s/\\(eval \\d+\\) //g if $@; return $ret; }" \
- 	"$PLContainer->permit(qw[require caller]); $PLContainer->reval('use strict;');" \
- 	"$PLContainer->deny(qw[require caller]); " \
- 	"sub ::mk_strict_safefunc {" \
- 	"      my $ret = $PLContainer->reval(qq[sub { BEGIN { strict->import(); } $_[0] $_[1] }]); " \
- 	"      $@ =~ s/\\(eval \\d+\\) //g if $@; return $ret; }"
- 
- #define SAFE_BAD \
- 	"use vars qw($PLContainer); $PLContainer = new Safe('PLPerl');" \
- 	"$PLContainer->permit_only(':default');" \
- 	"$PLContainer->share(qw[&elog &ERROR ]);" \
- 	"sub ::mksafefunc { return $PLContainer->reval(qq[sub { " \
- 	"      elog(ERROR,'trusted Perl functions disabled - " \
- 	"      please upgrade Perl Safe module to version 2.09 or later');}]); }" \
- 	"sub ::mk_strict_safefunc { return $PLContainer->reval(qq[sub { " \
- 	"      elog(ERROR,'trusted Perl functions disabled - " \
- 	"      please upgrade Perl Safe module to version 2.09 or later');}]); }"
- 
- #define TEST_FOR_MULTI \
- 	"use Config; " \
- 	"$Config{usemultiplicity} eq 'define' or "	\
- 	"($Config{usethreads} eq 'define' " \
- 	" and $Config{useithreads} eq 'define')"
- 
- 
  /********************************************************************
   *
   * We start out by creating a "held" interpreter that we can use in
--- 231,245 ----
  									&hash_ctl,
  									HASH_ELEM);
  
! 	plperl_held_interp = plperl_init_interp();
! 	interp_state = INTERP_HELD;
  
  	inited = true;
  }
  
  #define SAFE_MODULE \
  	"require Safe; $Safe::VERSION"
  
  /********************************************************************
   *
   * We start out by creating a "held" interpreter that we can use in
*************** check_interp(bool trusted)
*** 349,354 ****
--- 269,276 ----
  		}
  		plperl_held_interp = NULL;
  		trusted_context = trusted;
+ 		if (trusted) /* done last to avoid recursion */
+ 			plperl_safe_init();
  	}
  	else if (interp_state == INTERP_BOTH ||
  			 (trusted && interp_state == INTERP_TRUSTED) ||
*************** check_interp(bool trusted)
*** 363,384 ****
  			trusted_context = trusted;
  		}
  	}
! 	else if (can_run_two)
  	{
! 		PERL_SET_CONTEXT(plperl_held_interp);
! 		plperl_init_interp();
  		if (trusted)
! 			plperl_trusted_interp = plperl_held_interp;
  		else
! 			plperl_untrusted_interp = plperl_held_interp;
! 		interp_state = INTERP_BOTH;
  		plperl_held_interp = NULL;
  		trusted_context = trusted;
! 	}
! 	else
! 	{
  		elog(ERROR,
  			 "cannot allocate second Perl interpreter on this platform");
  	}
  }
  
--- 285,307 ----
  			trusted_context = trusted;
  		}
  	}
! 	else
  	{
! #ifdef MULTIPLICITY
! 		PerlInterpreter *plperl = plperl_init_interp();
  		if (trusted)
! 			plperl_trusted_interp = plperl;
  		else
! 			plperl_untrusted_interp = plperl;
  		plperl_held_interp = NULL;
  		trusted_context = trusted;
! 		interp_state = INTERP_BOTH;
! 		if (trusted) /* done last to avoid recursion */
! 			plperl_safe_init();
! #else
  		elog(ERROR,
  			 "cannot allocate second Perl interpreter on this platform");
+ #endif
  	}
  }
  
*************** restore_context(bool old_context)
*** 398,408 ****
  	}
  }
  
! static void
  plperl_init_interp(void)
  {
  	static char *embedding[3] = {
! 		"", "-e", PERLBOOT
  	};
  	int			nargs = 3;
  
--- 321,334 ----
  	}
  }
  
! static PerlInterpreter *
  plperl_init_interp(void)
  {
+ 	PerlInterpreter *plperl;
+ 	static int perl_sys_init_done;
+ 
  	static char *embedding[3] = {
! 		"", "-e", PLC_PERLBOOT
  	};
  	int			nargs = 3;
  
*************** plperl_init_interp(void)
*** 459,489 ****
  	 */
  #if defined(PERL_SYS_INIT3) && !defined(MYMALLOC)
  	/* only call this the first time through, as per perlembed man page */
! 	if (interp_state == INTERP_NONE)
  	{
  		char	   *dummy_env[1] = {NULL};
  
  		PERL_SYS_INIT3(&nargs, (char ***) &embedding, (char ***) &dummy_env);
  	}
  #endif
  
! 	plperl_held_interp = perl_alloc();
! 	if (!plperl_held_interp)
  		elog(ERROR, "could not allocate Perl interpreter");
  
! 	perl_construct(plperl_held_interp);
! 	perl_parse(plperl_held_interp, plperl_init_shared_libs,
  			   nargs, embedding, NULL);
! 	perl_run(plperl_held_interp);
! 
! 	if (interp_state == INTERP_NONE)
! 	{
! 		SV		   *res;
! 
! 		res = eval_pv(TEST_FOR_MULTI, TRUE);
! 		can_run_two = SvIV(res);
! 		interp_state = INTERP_HELD;
! 	}
  
  #ifdef WIN32
  
--- 385,408 ----
  	 */
  #if defined(PERL_SYS_INIT3) && !defined(MYMALLOC)
  	/* only call this the first time through, as per perlembed man page */
! 	if (!perl_sys_init_done)
  	{
  		char	   *dummy_env[1] = {NULL};
  
  		PERL_SYS_INIT3(&nargs, (char ***) &embedding, (char ***) &dummy_env);
+ 		perl_sys_init_done = 1;
  	}
  #endif
  
! 	plperl = perl_alloc();
! 	if (!plperl)
  		elog(ERROR, "could not allocate Perl interpreter");
  
! 	PERL_SET_CONTEXT(plperl);
! 	perl_construct(plperl);
! 	perl_parse(plperl, plperl_init_shared_libs,
  			   nargs, embedding, NULL);
! 	perl_run(plperl);
  
  #ifdef WIN32
  
*************** plperl_init_interp(void)
*** 526,557 ****
  	}
  #endif
  
  }
  
  
  static void
  plperl_safe_init(void)
  {
! 	SV		   *res;
! 	double		safe_version;
! 
! 	res = eval_pv(SAFE_MODULE, FALSE);	/* TRUE = croak if failure */
  
! 	safe_version = SvNV(res);
  
  	/*
! 	 * We actually want to reject safe_version < 2.09, but it's risky to
  	 * assume that floating-point comparisons are exact, so use a slightly
  	 * smaller comparison value.
  	 */
! 	if (safe_version < 2.0899)
  	{
  		/* not safe, so disallow all trusted funcs */
! 		eval_pv(SAFE_BAD, FALSE);
  	}
  	else
  	{
! 		eval_pv(SAFE_OK, FALSE);
  		if (GetDatabaseEncoding() == PG_UTF8)
  		{
  			/*
--- 445,474 ----
  	}
  #endif
  
+ 	return plperl;
  }
  
  
  static void
  plperl_safe_init(void)
  {
! 	SV		   *safe_version_sv;
  
! 	safe_version_sv = eval_pv(SAFE_MODULE, FALSE);	/* TRUE = croak if failure */
  
  	/*
! 	 * We actually want to reject Safe version < 2.09, but it's risky to
  	 * assume that floating-point comparisons are exact, so use a slightly
  	 * smaller comparison value.
  	 */
! 	if (SvNV(safe_version_sv) < 2.0899)
  	{
  		/* not safe, so disallow all trusted funcs */
! 		eval_pv(PLC_SAFE_BAD, FALSE);
  	}
  	else
  	{
! 		eval_pv(PLC_SAFE_OK, FALSE);
  		if (GetDatabaseEncoding() == PG_UTF8)
  		{
  			/*
*************** plperl_safe_init(void)
*** 559,593 ****
  			 * the safe container and call it. For some reason not entirely
  			 * clear, it prevents errors that can arise from the regex code
  			 * later trying to load utf8 modules.
  			 */
  			plperl_proc_desc desc;
  			FunctionCallInfoData fcinfo;
- 			SV		   *ret;
- 			SV		   *func;
- 
- 			/* make sure we don't call ourselves recursively */
- 			plperl_safe_init_done = true;
- 
- 			/* compile the function */
- 			func = plperl_create_sub("utf8fix",
- 							 "return shift =~ /\\xa9/i ? 'true' : 'false' ;",
- 									 true);
  
! 			/* set up to call the function with a single text argument 'a' */
! 			desc.reference = func;
  			desc.nargs = 1;
  			desc.arg_is_rowtype[0] = false;
  			fmgr_info(F_TEXTOUT, &(desc.arg_out_func[0]));
  
  			fcinfo.arg[0] = CStringGetTextDatum("a");
  			fcinfo.argnull[0] = false;
  
  			/* and make the call */
! 			ret = plperl_call_perl_func(&desc, &fcinfo);
  		}
  	}
- 
- 	plperl_safe_init_done = true;
  }
  
  /*
--- 476,504 ----
  			 * the safe container and call it. For some reason not entirely
  			 * clear, it prevents errors that can arise from the regex code
  			 * later trying to load utf8 modules.
+ 			 * See http://rt.perl.org/rt3/Ticket/Display.html?id=47576
  			 */
  			plperl_proc_desc desc;
  			FunctionCallInfoData fcinfo;
  
! 			desc.proname = "utf8fix";
! 			desc.lanpltrusted = true;
  			desc.nargs = 1;
  			desc.arg_is_rowtype[0] = false;
  			fmgr_info(F_TEXTOUT, &(desc.arg_out_func[0]));
  
+ 			/* compile the function */
+ 			plperl_create_sub(&desc,
+ 					"return shift =~ /\\xa9/i ? 'true' : 'false' ;");
+ 
+ 			/* set up to call the function with a single text argument 'a' */
  			fcinfo.arg[0] = CStringGetTextDatum("a");
  			fcinfo.argnull[0] = false;
  
  			/* and make the call */
! 			(void) plperl_call_perl_func(&desc, &fcinfo);
  		}
  	}
  }
  
  /*
*************** plperl_build_tuple_result(HV *perlhash, 
*** 631,641 ****
  							key)));
  		if (SvOK(val))
  		{
! 			char * aval;
! 
! 			aval = SvPV_nolen(val);
! 			pg_verifymbstr(aval, strlen(aval), false);
! 			values[attn - 1] = aval;
  		}
  	}
  	hv_iterinit(perlhash);
--- 542,548 ----
  							key)));
  		if (SvOK(val))
  		{
! 			values[attn - 1] = sv2text_mbverified(val);
  		}
  	}
  	hv_iterinit(perlhash);
*************** plperl_modify_tuple(HV *hvTD, TriggerDat
*** 835,846 ****
  		atttypmod = tupdesc->attrs[attn - 1]->atttypmod;
  		if (SvOK(val))
  		{
- 			char * aval;
- 
- 			aval = SvPV_nolen(val);
- 			pg_verifymbstr(aval,strlen(aval), false);
  			modvalues[slotsused] = InputFunctionCall(&finfo,
! 													 aval,
  													 typioparam,
  													 atttypmod);
  			modnulls[slotsused] = ' ';
--- 742,749 ----
  		atttypmod = tupdesc->attrs[attn - 1]->atttypmod;
  		if (SvOK(val))
  		{
  			modvalues[slotsused] = InputFunctionCall(&finfo,
! 													 sv2text_mbverified(val),
  													 typioparam,
  													 atttypmod);
  			modnulls[slotsused] = ' ';
*************** plperl_inline_handler(PG_FUNCTION_ARGS)
*** 970,978 ****
  
  		check_interp(desc.lanpltrusted);
  
! 		desc.reference = plperl_create_sub(desc.proname,
! 										   codeblock->source_text,
! 										   desc.lanpltrusted);
  
  		if (!desc.reference)	/* can this happen? */
  			elog(ERROR, "could not create internal procedure for anonymous code block");
--- 873,879 ----
  
  		check_interp(desc.lanpltrusted);
  
! 		plperl_create_sub(&desc, codeblock->source_text);
  
  		if (!desc.reference)	/* can this happen? */
  			elog(ERROR, "could not create internal procedure for anonymous code block");
*************** plperl_validator(PG_FUNCTION_ARGS)
*** 1080,1099 ****
   * Uses mksafefunc/mkunsafefunc to create an anonymous sub whose text is
   * supplied in s, and returns a reference to the closure.
   */
! static SV  *
! plperl_create_sub(const char *proname, const char *s, bool trusted)
  {
  	dSP;
  	SV		   *subref;
  	int			count;
  	char	   *compile_sub;
  
- 	if (trusted && !plperl_safe_init_done)
- 	{
- 		plperl_safe_init();
- 		SPAGAIN;
- 	}
- 
  	ENTER;
  	SAVETMPS;
  	PUSHMARK(SP);
--- 981,995 ----
   * Uses mksafefunc/mkunsafefunc to create an anonymous sub whose text is
   * supplied in s, and returns a reference to the closure.
   */
! static void
! plperl_create_sub(plperl_proc_desc *prodesc, char *s)
  {
  	dSP;
+ 	bool        trusted = prodesc->lanpltrusted;
  	SV		   *subref;
  	int			count;
  	char	   *compile_sub;
  
  	ENTER;
  	SAVETMPS;
  	PUSHMARK(SP);
*************** plperl_create_sub(const char *proname, c
*** 1127,1135 ****
  		elog(ERROR, "didn't get a return item from mksafefunc");
  	}
  
  	if (SvTRUE(ERRSV))
  	{
- 		(void) POPs;
  		PUTBACK;
  		FREETMPS;
  		LEAVE;
--- 1023,1032 ----
  		elog(ERROR, "didn't get a return item from mksafefunc");
  	}
  
+ 	subref = POPs;
+ 
  	if (SvTRUE(ERRSV))
  	{
  		PUTBACK;
  		FREETMPS;
  		LEAVE;
*************** plperl_create_sub(const char *proname, c
*** 1138,1167 ****
  				 errmsg("%s", strip_trailing_ws(SvPV_nolen(ERRSV)))));
  	}
  
- 	/*
- 	 * need to make a deep copy of the return. it comes off the stack as a
- 	 * temporary.
- 	 */
- 	subref = newSVsv(POPs);
- 
  	if (!SvROK(subref) || SvTYPE(SvRV(subref)) != SVt_PVCV)
  	{
  		PUTBACK;
  		FREETMPS;
  		LEAVE;
- 
- 		/*
- 		 * subref is our responsibility because it is not mortal
- 		 */
- 		SvREFCNT_dec(subref);
  		elog(ERROR, "didn't get a code ref");
  	}
  
  	PUTBACK;
  	FREETMPS;
  	LEAVE;
  
! 	return subref;
  }
  
  
--- 1035,1059 ----
  				 errmsg("%s", strip_trailing_ws(SvPV_nolen(ERRSV)))));
  	}
  
  	if (!SvROK(subref) || SvTYPE(SvRV(subref)) != SVt_PVCV)
  	{
  		PUTBACK;
  		FREETMPS;
  		LEAVE;
  		elog(ERROR, "didn't get a code ref");
  	}
  
+ 	/*
+ 	 * need to make a copy of the return, it comes off the stack as a
+ 	 * temporary.
+ 	 */
+ 	prodesc->reference = newSVsv(subref);
+ 
  	PUTBACK;
  	FREETMPS;
  	LEAVE;
  
! 	return;
  }
  
  
*************** plperl_func_handler(PG_FUNCTION_ARGS)
*** 1467,1473 ****
  	else
  	{
  		/* Return a perl string converted to a Datum */
- 		char	   *val;
  
  		if (prodesc->fn_retisarray && SvROK(perlret) &&
  			SvTYPE(SvRV(perlret)) == SVt_PVAV)
--- 1359,1364 ----
*************** plperl_func_handler(PG_FUNCTION_ARGS)
*** 1477,1485 ****
  			perlret = array_ret;
  		}
  
! 		val = SvPV_nolen(perlret);
! 		pg_verifymbstr(val, strlen(val), false);
! 		retval = InputFunctionCall(&prodesc->result_in_func, val,
  								   prodesc->result_typioparam, -1);
  	}
  
--- 1368,1375 ----
  			perlret = array_ret;
  		}
  
! 		retval = InputFunctionCall(&prodesc->result_in_func,
! 								   sv2text_mbverified(perlret),
  								   prodesc->result_typioparam, -1);
  	}
  
*************** compile_plperl_function(Oid fn_oid, bool
*** 1843,1851 ****
  
  		check_interp(prodesc->lanpltrusted);
  
! 		prodesc->reference = plperl_create_sub(prodesc->proname,
! 											   proc_source,
! 											   prodesc->lanpltrusted);
  
  		restore_context(oldcontext);
  
--- 1733,1739 ----
  
  		check_interp(prodesc->lanpltrusted);
  
! 		plperl_create_sub(prodesc, proc_source);
  
  		restore_context(oldcontext);
  
*************** plperl_return_next(SV *sv)
*** 2126,2142 ****
  
  		if (SvOK(sv))
  		{
- 			char	   *val;
- 
  			if (prodesc->fn_retisarray && SvROK(sv) &&
  				SvTYPE(SvRV(sv)) == SVt_PVAV)
  			{
  				sv = plperl_convert_to_pg_array(sv);
  			}
  
! 			val = SvPV_nolen(sv);
! 			pg_verifymbstr(val, strlen(val), false);
! 			ret = InputFunctionCall(&prodesc->result_in_func, val,
  									prodesc->result_typioparam, -1);
  			isNull = false;
  		}
--- 2014,2027 ----
  
  		if (SvOK(sv))
  		{
  			if (prodesc->fn_retisarray && SvROK(sv) &&
  				SvTYPE(SvRV(sv)) == SVt_PVAV)
  			{
  				sv = plperl_convert_to_pg_array(sv);
  			}
  
! 			ret = InputFunctionCall(&prodesc->result_in_func,
! 									sv2text_mbverified(sv),
  									prodesc->result_typioparam, -1);
  			isNull = false;
  		}
*************** plperl_spi_exec_prepared(char *query, HV
*** 2526,2537 ****
  		{
  			if (SvOK(argv[i]))
  			{
- 				char *val;
- 
- 				val = SvPV_nolen(argv[i]);
- 				pg_verifymbstr(val, strlen(val), false);
  				argvalues[i] = InputFunctionCall(&qdesc->arginfuncs[i],
! 												 val,
  												 qdesc->argtypioparams[i],
  												 -1);
  				nulls[i] = ' ';
--- 2411,2418 ----
  		{
  			if (SvOK(argv[i]))
  			{
  				argvalues[i] = InputFunctionCall(&qdesc->arginfuncs[i],
! 												 sv2text_mbverified(argv[i]),
  												 qdesc->argtypioparams[i],
  												 -1);
  				nulls[i] = ' ';
*************** plperl_spi_query_prepared(char *query, i
*** 2661,2672 ****
  		{
  			if (SvOK(argv[i]))
  			{
- 				char *val;
- 				
- 				val = SvPV_nolen(argv[i]);
- 				pg_verifymbstr(val, strlen(val), false);
  				argvalues[i] = InputFunctionCall(&qdesc->arginfuncs[i],
! 												 val,
  												 qdesc->argtypioparams[i],
  												 -1);
  				nulls[i] = ' ';
--- 2542,2549 ----
  		{
  			if (SvOK(argv[i]))
  			{
  				argvalues[i] = InputFunctionCall(&qdesc->arginfuncs[i],
! 												 sv2text_mbverified(argv[i]),
  												 qdesc->argtypioparams[i],
  												 -1);
  				nulls[i] = ' ';
*************** plperl_spi_freeplan(char *query)
*** 2774,2779 ****
--- 2651,2670 ----
  }
  
  /*
+  * Convert an SV to char * and verify the encoding via pg_verifymbstr()
+  */
+ static char *
+ sv2text_mbverified(const SV *sv)
+ {
+ 	char * val;
+ 	STRLEN len;
+ 
+ 	val = SvPV(sv, len);
+ 	pg_verifymbstr(val, len, false);
+     return val;
+ }
+ 
+ /*
   * Create a new SV from a string assumed to be in the current database's
   * encoding.
   */
diff --git a/src/pl/plperl/sql/plperl.sql b/src/pl/plperl/sql/plperl.sql
index f12e2f7..08e5371 100644
*** a/src/pl/plperl/sql/plperl.sql
--- b/src/pl/plperl/sql/plperl.sql
*************** $$ LANGUAGE plperl;
*** 369,371 ****
--- 369,372 ----
  
  -- check that restricted operations are rejected in a plperl DO block
  DO $$ use Config; $$ LANGUAGE plperl;
+ 
diff --git a/src/pl/plperl/text2macro.pl b/src/pl/plperl/text2macro.pl
index ...1628e86 .
*** a/src/pl/plperl/text2macro.pl
--- b/src/pl/plperl/text2macro.pl
***************
*** 0 ****
--- 1,98 ----
+ =head1 NAME
+ 
+ text2macro.pl - convert text files into C string-literal macro definitions
+ 
+ =head1 SYNOPSIS
+ 
+   text2macro [options] file ... > output.h
+ 
+ Options:
+ 
+   --prefix=S   - add prefix S to the names of the macros
+   --name=S     - use S as the macro name (assumes only one file)
+   --strip=S    - don't include lines that match perl regex S
+ 
+ =head1 DESCRIPTION
+ 
+ Reads one or more text files and outputs a corresponding series of C
+ pre-processor macro definitions. Each macro defines a string literal that
+ contains the contents of the corresponding text file. The basename of the text
+ file as capitalized and used as the name of the macro, along with an optional prefix.
+ 
+ =cut
+ 
+ use strict;
+ use warnings;
+ 
+ use Getopt::Long;
+ 
+ GetOptions(
+ 	'prefix=s'  => \my $opt_prefix,
+ 	'name=s'    => \my $opt_name,
+ 	'strip=s'   => \my $opt_strip,
+ 	'selftest!' => sub { exit selftest() },
+ ) or exit 1;
+ 
+ die "No text files specified"
+ 	unless @ARGV;
+ 
+ print qq{
+ /*
+  * DO NOT EDIT - THIS FILE IS AUTOGENERATED - CHANGES WILL BE LOST
+  * Written by $0 from @ARGV
+  */
+ };
+ 
+ for my $src_file (@ARGV) {
+ 
+ 	(my $macro = $src_file) =~ s/ .*? (\w+) (?:\.\w+) $/$1/x;
+ 
+ 	open my $src_fh, $src_file # not 3-arg form
+ 		or die "Can't open $src_file: $!";
+ 
+ 	printf qq{#define %s%s \\\n},
+ 		$opt_prefix || '',
+ 		($opt_name) ? $opt_name : uc $macro;
+ 	while (<$src_fh>) {
+ 		chomp;
+ 
+ 		next if $opt_strip and m/$opt_strip/o;
+ 
+ 		# escape the text to suite C string literal rules
+ 		s/\\/\\\\/g;
+ 		s/"/\\"/g;
+ 
+ 		printf qq{"%s\\n" \\\n}, $_;
+ 	}
+ 	print qq{""\n\n};
+ }
+ 
+ print "/* end */\n";
+ 
+ exit 0;
+ 
+ 
+ sub selftest {
+ 	my $tmp = "text2macro_tmp";
+ 	my $string = q{a '' '\\'' "" "\\"" "\\\\" "\\\\n" b};
+ 
+ 	open my $fh, ">$tmp.pl" or die;
+ 	print $fh $string;
+ 	close $fh;
+ 
+ 	system("perl $0 --name=X $tmp.pl > $tmp.c") == 0 or die;
+ 	open $fh, ">>$tmp.c";
+ 	print $fh "#include <stdio.h>\n";
+ 	print $fh "int main() { puts(X); return 0; }\n";
+ 	close $fh;
+ 	system("cat -n $tmp.c");
+ 	
+ 	system("make $tmp") == 0 or die;
+ 	open $fh, "./$tmp |" or die;
+ 	my $result = <$fh>;
+ 	unlink <$tmp.*>;
+ 
+ 	warn "Test string: $string\n";
+ 	warn "Result     : $result";
+ 	die "Failed!" if $result ne "$string\n";
+ }
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to