Re: plpython_unicode test (was Re: [HACKERS] buildfarm / handling (undefined) locales)

2014-06-02 Thread Tom Lane
Andrew Dunstan and...@dunslane.net writes:
 On 06/01/2014 05:35 PM, Tom Lane wrote:
 I did a little bit of experimentation and determined that none of the
 LATIN1 characters are significantly more portable than what we've got:
 for instance a-acute fails to convert into 16 of the 33 supported
 server-side encodings (versus 17 failures for U+0080).  However,
 non-breaking space is significantly better: it converts into all our
 supported server encodings except EUC_CN, EUC_JP, EUC_KR, EUC_TW.
 It seems likely that we won't do better than that except with a basic
 ASCII character.

 Yeah, I just looked at the copyright symbol, with similar results.

I'd been hopeful about that one too, but nope :-(

 Let's just stick to ASCII.

The more I think about it, the more I think that using a plain-ASCII
character would defeat most of the purpose of the test.  Non-breaking
space seems like the best bet here, not least because it has several
different representations among the encodings we support.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: plpython_unicode test (was Re: [HACKERS] buildfarm / handling (undefined) locales)

2014-06-02 Thread Tom Lane
I wrote:
 Andrew Dunstan and...@dunslane.net writes:
 Let's just stick to ASCII.

 The more I think about it, the more I think that using a plain-ASCII
 character would defeat most of the purpose of the test.  Non-breaking
 space seems like the best bet here, not least because it has several
 different representations among the encodings we support.

I've confirmed that a patch as attached behaves per expectation
(in particular, it passes with WIN1250 database encoding).

I think that the worry I expressed about UTF8 characters in expected-files
is probably overblown: we have such in collate.linux.utf8 test, and we've
not heard reports of that one breaking.  (Though of course, it's not run
by default :-(.)  It's still annoying that the test would fail in EUC_xx
encodings, but I see no way around that without largely lobotomizing the
test.

So I propose to apply this, and back-patch to 9.1 (not 9.0, because its
version of this test is different anyway --- so Tomas will have to drop
testing cs_CZ.WIN-1250 in 9.0).

regards, tom lane

diff --git a/src/pl/plpython/expected/plpython_unicode.out b/src/pl/plpython/expected/plpython_unicode.out
index 859edbb..c7546dd 100644
*** a/src/pl/plpython/expected/plpython_unicode.out
--- b/src/pl/plpython/expected/plpython_unicode.out
***
*** 1,22 
  --
  -- Unicode handling
  --
  SET client_encoding TO UTF8;
  CREATE TABLE unicode_test (
  	testvalue  text NOT NULL
  );
  CREATE FUNCTION unicode_return() RETURNS text AS E'
! return u\\x80
  ' LANGUAGE plpythonu;
  CREATE FUNCTION unicode_trigger() RETURNS trigger AS E'
! TD[new][testvalue] = u\\x80
  return MODIFY
  ' LANGUAGE plpythonu;
  CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test
FOR EACH ROW EXECUTE PROCEDURE unicode_trigger();
  CREATE FUNCTION unicode_plan1() RETURNS text AS E'
  plan = plpy.prepare(SELECT $1 AS testvalue, [text])
! rv = plpy.execute(plan, [u\\x80], 1)
  return rv[0][testvalue]
  ' LANGUAGE plpythonu;
  CREATE FUNCTION unicode_plan2() RETURNS text AS E'
--- 1,27 
  --
  -- Unicode handling
  --
+ -- Note: this test case is known to fail if the database encoding is
+ -- EUC_CN, EUC_JP, EUC_KR, or EUC_TW, for lack of any equivalent to
+ -- U+00A0 (no-break space) in those encodings.  However, testing with
+ -- plain ASCII data would be rather useless, so we must live with that.
+ --
  SET client_encoding TO UTF8;
  CREATE TABLE unicode_test (
  	testvalue  text NOT NULL
  );
  CREATE FUNCTION unicode_return() RETURNS text AS E'
! return u\\xA0
  ' LANGUAGE plpythonu;
  CREATE FUNCTION unicode_trigger() RETURNS trigger AS E'
! TD[new][testvalue] = u\\xA0
  return MODIFY
  ' LANGUAGE plpythonu;
  CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test
FOR EACH ROW EXECUTE PROCEDURE unicode_trigger();
  CREATE FUNCTION unicode_plan1() RETURNS text AS E'
  plan = plpy.prepare(SELECT $1 AS testvalue, [text])
! rv = plpy.execute(plan, [u\\xA0], 1)
  return rv[0][testvalue]
  ' LANGUAGE plpythonu;
  CREATE FUNCTION unicode_plan2() RETURNS text AS E'
*** return rv[0][testvalue]
*** 27,46 
  SELECT unicode_return();
   unicode_return 
  
!  \u0080
  (1 row)
  
  INSERT INTO unicode_test (testvalue) VALUES ('test');
  SELECT * FROM unicode_test;
   testvalue 
  ---
!  \u0080
  (1 row)
  
  SELECT unicode_plan1();
   unicode_plan1 
  ---
!  \u0080
  (1 row)
  
  SELECT unicode_plan2();
--- 32,51 
  SELECT unicode_return();
   unicode_return 
  
!   
  (1 row)
  
  INSERT INTO unicode_test (testvalue) VALUES ('test');
  SELECT * FROM unicode_test;
   testvalue 
  ---
!   
  (1 row)
  
  SELECT unicode_plan1();
   unicode_plan1 
  ---
!   
  (1 row)
  
  SELECT unicode_plan2();
diff --git a/src/pl/plpython/sql/plpython_unicode.sql b/src/pl/plpython/sql/plpython_unicode.sql
index bdd40c4..a11e5ee 100644
*** a/src/pl/plpython/sql/plpython_unicode.sql
--- b/src/pl/plpython/sql/plpython_unicode.sql
***
*** 1,6 
--- 1,11 
  --
  -- Unicode handling
  --
+ -- Note: this test case is known to fail if the database encoding is
+ -- EUC_CN, EUC_JP, EUC_KR, or EUC_TW, for lack of any equivalent to
+ -- U+00A0 (no-break space) in those encodings.  However, testing with
+ -- plain ASCII data would be rather useless, so we must live with that.
+ --
  
  SET client_encoding TO UTF8;
  
*** CREATE TABLE unicode_test (
*** 9,19 
  );
  
  CREATE FUNCTION unicode_return() RETURNS text AS E'
! return u\\x80
  ' LANGUAGE plpythonu;
  
  CREATE FUNCTION unicode_trigger() RETURNS trigger AS E'
! TD[new][testvalue] = u\\x80
  return MODIFY
  ' LANGUAGE plpythonu;
  
--- 14,24 
  );
  
  CREATE FUNCTION unicode_return() RETURNS text AS E'
! return u\\xA0
  ' LANGUAGE plpythonu;
  
  CREATE FUNCTION unicode_trigger() RETURNS trigger AS E'
! TD[new][testvalue] = u\\xA0
  return MODIFY
  ' LANGUAGE plpythonu;
  
*** 

plpython_unicode test (was Re: [HACKERS] buildfarm / handling (undefined) locales)

2014-06-01 Thread Tom Lane
Tomas Vondra t...@fuzzy.cz writes:
 On 13.5.2014 20:58, Tom Lane wrote:
 Tomas Vondra t...@fuzzy.cz writes:
 Yeah, not really what we were shooting for. I've fixed this by
 defining the missing locales, and indeed - magpie now fails in
 plpython tests.

 I saw that earlier today (tho right now the buildfarm server seems
 to not be responding :-().  Probably we should use some
 more-widely-used character code in that specific test?

 Any idea what other character could be used in those tests? ISTM fixing
 this universally would mean using ASCII characters - the subset of UTF-8
 common to all the encodings. But I'm afraid that'd contradict the very
 purpose of those tests ...

We really ought to resolve this issue so that we can get rid of some of
the red in the buildfarm.  ISTM there are three possible approaches:

1. Decide that we're not going to support running the plpython regression
tests under weird server encodings, in which case Tomas should just
remove cs_CZ.WIN-1250 from the set of encodings his buildfarm animals
test.  Don't much care for this, but it has the attraction of being
minimal work.

2. Change the plpython_unicode test to use some ASCII character in place
of \u0080.  We could keep on using the \u syntax to create the character,
but as stated above, this still seems like it's losing a significant
amount of test coverage.

3. Try to select some more portable non-ASCII character, perhaps U+00A0
(non breaking space) or U+00E1 (a-acute).  I think this would probably
work for most encodings but it might still fail in the Far East.  Another
objection is that the expected/plpython_unicode.out file would contain
that character in UTF8 form.  In principle that would work, since the test
sets client_encoding = utf8 explicitly, but I'm worried about accidental
corruption of the expected file by text editors, file transfers, etc.
(The current usage of U+0080 doesn't suffer from this risk because psql
special-cases printing of multibyte UTF8 control characters, so that we
get exactly \u0080.)

Thoughts?

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: plpython_unicode test (was Re: [HACKERS] buildfarm / handling (undefined) locales)

2014-06-01 Thread Tom Lane
I wrote:
 3. Try to select some more portable non-ASCII character, perhaps U+00A0
 (non breaking space) or U+00E1 (a-acute).  I think this would probably
 work for most encodings but it might still fail in the Far East.  Another
 objection is that the expected/plpython_unicode.out file would contain
 that character in UTF8 form.  In principle that would work, since the test
 sets client_encoding = utf8 explicitly, but I'm worried about accidental
 corruption of the expected file by text editors, file transfers, etc.
 (The current usage of U+0080 doesn't suffer from this risk because psql
 special-cases printing of multibyte UTF8 control characters, so that we
 get exactly \u0080.)

I did a little bit of experimentation and determined that none of the
LATIN1 characters are significantly more portable than what we've got:
for instance a-acute fails to convert into 16 of the 33 supported
server-side encodings (versus 17 failures for U+0080).  However,
non-breaking space is significantly better: it converts into all our
supported server encodings except EUC_CN, EUC_JP, EUC_KR, EUC_TW.
It seems likely that we won't do better than that except with a basic
ASCII character.

In principle we could make the test pass even in these encodings
by adding variant expected files, but I doubt it's worth it.  I'd
be inclined to just add a comment to the regression test file indicating
that that's a known failure case, and move on.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: plpython_unicode test (was Re: [HACKERS] buildfarm / handling (undefined) locales)

2014-06-01 Thread Andrew Dunstan


On 06/01/2014 05:35 PM, Tom Lane wrote:

I wrote:

3. Try to select some more portable non-ASCII character, perhaps U+00A0
(non breaking space) or U+00E1 (a-acute).  I think this would probably
work for most encodings but it might still fail in the Far East.  Another
objection is that the expected/plpython_unicode.out file would contain
that character in UTF8 form.  In principle that would work, since the test
sets client_encoding = utf8 explicitly, but I'm worried about accidental
corruption of the expected file by text editors, file transfers, etc.
(The current usage of U+0080 doesn't suffer from this risk because psql
special-cases printing of multibyte UTF8 control characters, so that we
get exactly \u0080.)

I did a little bit of experimentation and determined that none of the
LATIN1 characters are significantly more portable than what we've got:
for instance a-acute fails to convert into 16 of the 33 supported
server-side encodings (versus 17 failures for U+0080).  However,
non-breaking space is significantly better: it converts into all our
supported server encodings except EUC_CN, EUC_JP, EUC_KR, EUC_TW.
It seems likely that we won't do better than that except with a basic
ASCII character.



Yeah, I just looked at the copyright symbol, with similar results.

Let's just stick to ASCII.

cheers

andrew



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-24 Thread Tomas Vondra
On 13.5.2014 20:58, Tom Lane wrote:
 Tomas Vondra t...@fuzzy.cz writes:

 Yeah, not really what we were shooting for. I've fixed this by
 defining the missing locales, and indeed - magpie now fails in
 plpython tests.
 
 I saw that earlier today (tho right now the buildfarm server seems
 to not be responding :-().  Probably we should use some
 more-widely-used character code in that specific test?

Any idea what other character could be used in those tests? ISTM fixing
this universally would mean using ASCII characters - the subset of UTF-8
common to all the encodings. But I'm afraid that'd contradict the very
purpose of those tests ...

Tomas


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-14 Thread Heikki Linnakangas

On 05/14/2014 12:07 AM, Tom Lane wrote:

Andrew Dunstan and...@dunslane.net writes:

On 05/13/2014 04:14 PM, Tom Lane wrote:

But independently of whether it's a fatal error or not: when there's
no relevant command-line argument then we print the

invalid locale name 

message which is surely pretty unhelpful.  It'd be better if we could
finger the incorrect environment setting.  Unfortunately, we don't know
for sure which environment variable(s) setlocale was looking at --- I
believe it's somewhat platform specific.  We could probably print
something like this instead:

environment locale settings are invalid

Thoughts?



I'd also be tempted to add the settings for LC_ALL and LANG and note
that they are possible sources of the problem, or maybe only do that if
they match the locale being rejected.


Well, all we know is that we asked setlocale for the default locale from
the environment and it failed.  We don't really know which variable(s)
it looked at.  I think printing specific variables could easily be as
misleading as it is helpful; and given the lack of prior complaints,
I'm doubtful that it's worth going to the effort of trying to print a
platform-specific collection of variables.


Printing the values of the environment variables sounds complicated, but 
I think a generic hint along these lines would be good:


$ LANG=foo initdb
The files belonging to this database system will be owned by user 
postgres.

This user must also own the server process.

initdb: environment locale settings are invalid
HINT: This usually means that the LANG, LC_ALL or LC_* environment 
variable has an invalid value.


Specifically mentioning those environment variables by name would be 
very helpful to the user. They are specified by POSIX: 
http://pubs.opengroup.org/onlinepubs/7908799/xbd/envvar.html, so they're 
not that platform-specific. Windows is a different story, though.


- Heikki


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-14 Thread Tom Lane
Heikki Linnakangas hlinnakan...@vmware.com writes:
 On 05/14/2014 12:07 AM, Tom Lane wrote:
 Well, all we know is that we asked setlocale for the default locale from
 the environment and it failed.  We don't really know which variable(s)
 it looked at.

 Printing the values of the environment variables sounds complicated, but 
 I think a generic hint along these lines would be good:

 initdb: environment locale settings are invalid
 HINT: This usually means that the LANG, LC_ALL or LC_* environment 
 variable has an invalid value.

Hm.  Keep in mind this is not backend code so we don't have a HINT
formalism.  How about

initdb: invalid locale settings; check LANG and LC_* environment variables

 Specifically mentioning those environment variables by name would be 
 very helpful to the user. They are specified by POSIX: 
 http://pubs.opengroup.org/onlinepubs/7908799/xbd/envvar.html, so they're 
 not that platform-specific. Windows is a different story, though.

I seem to recall having heard that some implementations check LANGUAGE
and perhaps other spellings as well.  Still, this might be better than
assuming the user has a clue what to check.

As far as Windows goes, we could certainly use #ifdef WIN32 to print
some different text, if anyone can write down what it should be.

Anyway, given that we seem to have consensus on doing this (modulo
exact text of the error message), the next question is whether we
want to change this only in HEAD, or consider it a back-patchable
bug fix.  I lean to the former but can see that there's some
argument for the latter.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-14 Thread Heikki Linnakangas

On 05/14/2014 05:19 PM, Tom Lane wrote:

Heikki Linnakangas hlinnakan...@vmware.com writes:

On 05/14/2014 12:07 AM, Tom Lane wrote:

Well, all we know is that we asked setlocale for the default locale from
the environment and it failed.  We don't really know which variable(s)
it looked at.



Printing the values of the environment variables sounds complicated, but
I think a generic hint along these lines would be good:



initdb: environment locale settings are invalid
HINT: This usually means that the LANG, LC_ALL or LC_* environment
variable has an invalid value.


Hm.  Keep in mind this is not backend code so we don't have a HINT
formalism.  How about

initdb: invalid locale settings; check LANG and LC_* environment variables


Works for me. I note that we have used the HINT: wording in pg_ctl, too:

pg_ctl: server does not shut down
HINT: The -m fast option immediately disconnects sessions rather than
waiting for session-initiated disconnection.

That's the only client program that does that, though, so perhaps we 
want to reconsider that in pg_ctl.



Anyway, given that we seem to have consensus on doing this (modulo
exact text of the error message), the next question is whether we
want to change this only in HEAD, or consider it a back-patchable
bug fix.  I lean to the former but can see that there's some
argument for the latter.


HEAD-only. There might be scripts out there that do initdb with invalid 
LANG. They might be perfectly happy with getting the C locale, and 
backpatching this would make them unhappy.


- Heikki


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-14 Thread Tomas Vondra
On 14 Květen 2014, 16:26, Heikki Linnakangas wrote:
 On 05/14/2014 05:19 PM, Tom Lane wrote:

 Anyway, given that we seem to have consensus on doing this (modulo
 exact text of the error message), the next question is whether we
 want to change this only in HEAD, or consider it a back-patchable
 bug fix.  I lean to the former but can see that there's some
 argument for the latter.

 HEAD-only. There might be scripts out there that do initdb with invalid
 LANG. They might be perfectly happy with getting the C locale, and
 backpatching this would make them unhappy.

Not sure if 'happy with broken config' is the right argument here, but it
certainly is a behaviour change / not a bugfix, so +1 for HEAD-only from
me too.

Tomas



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-14 Thread Tom Lane
I wrote:
 As far as Windows goes, we could certainly use #ifdef WIN32 to print
 some different text, if anyone can write down what it should be.

Some googling suggests that on Windows, setlocale pays no attention
to environment variables but gets a value that the user can set via
the Control Panel.  This probably means that an invalid setting is
impossible, or at least that we have no clear idea how the user could
screw it up.  So I'm thinking we do not need a Windows-specific
phrasing of the message, until such time as we see evidence of how
you could provoke this case on Windows.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-14 Thread Andrew Dunstan


On 05/14/2014 11:02 AM, Tom Lane wrote:

I wrote:

As far as Windows goes, we could certainly use #ifdef WIN32 to print
some different text, if anyone can write down what it should be.

Some googling suggests that on Windows, setlocale pays no attention
to environment variables but gets a value that the user can set via
the Control Panel.


Indeed. There is a comment in plperl.c about that, because we have to 
deal with it in a somewhat ugly fashion.




This probably means that an invalid setting is
impossible, or at least that we have no clear idea how the user could
screw it up.  So I'm thinking we do not need a Windows-specific
phrasing of the message, until such time as we see evidence of how
you could provoke this case on Windows.




Right. I'd just leave Windows out of it for now.

cheers

andrew



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-14 Thread Tomas Vondra
On 13.5.2014 23:07, Tom Lane wrote:
 Andrew Dunstan and...@dunslane.net writes:
 On 05/13/2014 04:14 PM, Tom Lane wrote:
 But independently of whether it's a fatal error or not: when there's
 no relevant command-line argument then we print the

 invalid locale name 

 message which is surely pretty unhelpful.  It'd be better if we could
 finger the incorrect environment setting.  Unfortunately, we don't know
 for sure which environment variable(s) setlocale was looking at --- I
 believe it's somewhat platform specific.  We could probably print
 something like this instead:

 environment locale settings are invalid

 Thoughts?
 
 I'd also be tempted to add the settings for LC_ALL and LANG and note 
 that they are possible sources of the problem, or maybe only do that if 
 they match the locale being rejected.
 
 Well, all we know is that we asked setlocale for the default locale from
 the environment and it failed.  We don't really know which variable(s)
 it looked at.  I think printing specific variables could easily be as
 misleading as it is helpful; and given the lack of prior complaints,
 I'm doubtful that it's worth going to the effort of trying to print a
 platform-specific collection of variables.
 
 Attached is a draft patch that behaves like this:
 
 $ initdb --locale=foo
 The files belonging to this database system will be owned by user postgres.
 This user must also own the server process.
 
 initdb: invalid locale name foo

I see the committed patch actually behaves like this:

  initdb: invalid locale name cs_CZ.WIN-1250
  The files belonging to this database system will be owned by user
  pgbuild.
  This user must also own the server process.

(at least that's what magpie logged at [1]). I find that confusing,
because initdb logs the actual problem and then two more lines so it
seems like nothing happened.

I think we should keep the message explaining the problem last, i.e. not
log the two additional messages (which are quite pointless), switch the
order of messages or add another one.

[1]
http://www.pgbuildfarm.org/cgi-bin/show_log.pl?nm=magpiedt=2014-05-14%2019%3A20%3A25

Tomas


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-14 Thread Tom Lane
Tomas Vondra t...@fuzzy.cz writes:
 On 13.5.2014 23:07, Tom Lane wrote:
 Attached is a draft patch that behaves like this:
 
 $ initdb --locale=foo
 The files belonging to this database system will be owned by user postgres.
 This user must also own the server process.
 
 initdb: invalid locale name foo

 I see the committed patch actually behaves like this:

   initdb: invalid locale name cs_CZ.WIN-1250
   The files belonging to this database system will be owned by user
   pgbuild.
   This user must also own the server process.

 (at least that's what magpie logged at [1]). I find that confusing,

Interesting. I guess stdout isn't getting fflush'd soon enough when
it's going to a file.  We should probably mark it as line-buffered
so the behavior is consistent with the interactive case.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-13 Thread Tomas Vondra
On 13.5.2014 20:27, Tomas Vondra wrote:
 Hi all,
 
 a few days ago I switched magpie into an LXC container, and while
 fixinig unrelated issue there, I noticed that although the tests seemed
 to run, some of the results are actually rubbish because of missing locales.

I forgot to mention that I noticed another (possible) issue - ISTM the
buildfarm client is not packing all the log files. For for the failure
in REL9_3_STABLE, there are 32 log files packed:


http://www.pgbuildfarm.org/cgi-bin/show_log.pl?nm=magpiedt=2014-05-13%2014%3A59%3A46

However in the lastrun-logs I see 81 files, and upon closer inspection
it seems that pretty much logs for all custom locales (cs_CZ.*, sk_SK.*)
are missing:

initdb-cs_CZ.ISO-8859-2
initdb-cs_CZ.UTF-8
initdb-cs_CZ.WIN-1250
initdb-sk_SK.ISO-8859-2
initdb-sk_SK.UTF-8
install-check-cs_CZ.ISO-8859-2
install-check-cs_CZ.UTF-8
install-check-cs_CZ.WIN-1250
install-check-sk_SK.ISO-8859-2
install-check-sk_SK.UTF-8
pl-install-check-cs_CZ.ISO-8859-2
pl-install-check-cs_CZ.UTF-8
pl-install-check-cs_CZ.WIN-1250
pl-install-check-sk_SK.ISO-8859-2
pl-install-check-sk_SK.UTF-8
startdb-cs_CZ.ISO-8859-2-1
startdb-cs_CZ.ISO-8859-2-2
startdb-cs_CZ.ISO-8859-2-3
startdb-cs_CZ.UTF-8-1
startdb-cs_CZ.UTF-8-2
startdb-cs_CZ.UTF-8-3
startdb-cs_CZ.WIN-1250-1
startdb-cs_CZ.WIN-1250-2
startdb-sk_SK.ISO-8859-2-1
startdb-sk_SK.ISO-8859-2-2
startdb-sk_SK.ISO-8859-2-3
startdb-sk_SK.UTF-8-1
startdb-sk_SK.UTF-8-2
startdb-sk_SK.UTF-8-3
stopdb-cs_CZ.ISO-8859-2-1
stopdb-cs_CZ.ISO-8859-2-2
stopdb-cs_CZ.ISO-8859-2-3
stopdb-cs_CZ.UTF-8-1
stopdb-cs_CZ.UTF-8-2
stopdb-cs_CZ.UTF-8-3
stopdb-cs_CZ.WIN-1250-1
stopdb-sk_SK.ISO-8859-2-1
stopdb-sk_SK.ISO-8859-2-2
stopdb-sk_SK.ISO-8859-2-3
stopdb-sk_SK.UTF-8-1
stopdb-sk_SK.UTF-8-2
stopdb-sk_SK.UTF-8-3

A few days back this was reported properly
(http://www.pgbuildfarm.org/cgi-bin/show_log.pl?nm=magpiedt=2014-04-17%2000%3A07%3A36).
I doubt this is related to the LXC container, but I was running 4.9
version of the buildfarm client back then, while now it's running in
4.12. I'd guess this is the actual cause.

regards
Tomas


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-13 Thread Tom Lane
Tomas Vondra t...@fuzzy.cz writes:
 a few days ago I switched magpie into an LXC container, and while
 fixinig unrelated issue there, I noticed that although the tests seemed
 to run, some of the results are actually rubbish because of missing locales.

 So for example when the system misses cs_CZ.WIN-1250 (i.e. czech locale
 with windows codepage 1250), initdb actually did this

 The files belonging to this database system will be owned by user
 pgbuild.
 This user must also own the server process.

 initdb: invalid locale name 
 initdb: invalid locale name 
 initdb: invalid locale name 
 initdb: invalid locale name 
 initdb: invalid locale name 
 initdb: invalid locale name 
 The database cluster will be initialized with locale C.
 The default database encoding has accordingly been set to
 SQL_ASCII.
 The default text search configuration will be set to english.

Hm, I'm a bit confused as to what you actually did here.  The invalid
locale name bleats seem to indicate that no --locale or --lc_xxx options
were given on the command line; correct?  If so the issue is presumably
that the environment variable(s) were set to incorrect values.  While
we *could* abort in that situation, I've never heard of any program
that did; the normal response is to silently ignore the environment
variables and use C locale.  We're not being exactly silent about it
but I think the outcome is the expected one.

There is a comment in the code about this:

/* should we exit here? */
if (res == NULL)
fprintf(stderr, _(%s: invalid locale name \%s\\n),
progname, locale);

but I think what's being questioned is whether an incorrect locale
name *given as a command line argument* should result in an abort.
That might be a good idea, but it looks like it'd require some
restructuring of the code to make it possible to distinguish the
case from bad-environment.

 Yeah, not really what we were shooting for. I've fixed this by defining
 the missing locales, and indeed - magpie now fails in plpython tests.

I saw that earlier today (tho right now the buildfarm server seems to
not be responding :-().  Probably we should use some more-widely-used
character code in that specific test?

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-13 Thread Andrew Dunstan


On 05/13/2014 02:39 PM, Tomas Vondra wrote:

On 13.5.2014 20:27, Tomas Vondra wrote:

Hi all,

a few days ago I switched magpie into an LXC container, and while
fixinig unrelated issue there, I noticed that although the tests seemed
to run, some of the results are actually rubbish because of missing locales.

I forgot to mention that I noticed another (possible) issue - ISTM the
buildfarm client is not packing all the log files. For for the failure
in REL9_3_STABLE, there are 32 log files packed:


http://www.pgbuildfarm.org/cgi-bin/show_log.pl?nm=magpiedt=2014-05-13%2014%3A59%3A46

However in the lastrun-logs I see 81 files, and upon closer inspection
it seems that pretty much logs for all custom locales (cs_CZ.*, sk_SK.*)
are missing:

 initdb-cs_CZ.ISO-8859-2
 initdb-cs_CZ.UTF-8
 initdb-cs_CZ.WIN-1250
 initdb-sk_SK.ISO-8859-2
 initdb-sk_SK.UTF-8
 install-check-cs_CZ.ISO-8859-2
 install-check-cs_CZ.UTF-8
 install-check-cs_CZ.WIN-1250
 install-check-sk_SK.ISO-8859-2
 install-check-sk_SK.UTF-8
 pl-install-check-cs_CZ.ISO-8859-2
 pl-install-check-cs_CZ.UTF-8
 pl-install-check-cs_CZ.WIN-1250
 pl-install-check-sk_SK.ISO-8859-2
 pl-install-check-sk_SK.UTF-8
 startdb-cs_CZ.ISO-8859-2-1
 startdb-cs_CZ.ISO-8859-2-2
 startdb-cs_CZ.ISO-8859-2-3
 startdb-cs_CZ.UTF-8-1
 startdb-cs_CZ.UTF-8-2
 startdb-cs_CZ.UTF-8-3
 startdb-cs_CZ.WIN-1250-1
 startdb-cs_CZ.WIN-1250-2
 startdb-sk_SK.ISO-8859-2-1
 startdb-sk_SK.ISO-8859-2-2
 startdb-sk_SK.ISO-8859-2-3
 startdb-sk_SK.UTF-8-1
 startdb-sk_SK.UTF-8-2
 startdb-sk_SK.UTF-8-3
 stopdb-cs_CZ.ISO-8859-2-1
 stopdb-cs_CZ.ISO-8859-2-2
 stopdb-cs_CZ.ISO-8859-2-3
 stopdb-cs_CZ.UTF-8-1
 stopdb-cs_CZ.UTF-8-2
 stopdb-cs_CZ.UTF-8-3
 stopdb-cs_CZ.WIN-1250-1
 stopdb-sk_SK.ISO-8859-2-1
 stopdb-sk_SK.ISO-8859-2-2
 stopdb-sk_SK.ISO-8859-2-3
 stopdb-sk_SK.UTF-8-1
 stopdb-sk_SK.UTF-8-2
 stopdb-sk_SK.UTF-8-3

A few days back this was reported properly
(http://www.pgbuildfarm.org/cgi-bin/show_log.pl?nm=magpiedt=2014-04-17%2000%3A07%3A36).
I doubt this is related to the LXC container, but I was running 4.9
version of the buildfarm client back then, while now it's running in
4.12. I'd guess this is the actual cause.




Clearly this is from a different run, since the link above shows the run 
failing at the contrib-install-check-en_US stage, and so of course all 
those other locales would not have been checked in that run.


So there is no error here. If you want to check for an error compare the 
contents of lastrun-logs/*.log with the contents of runlogs.tgz.


cheers

andrew


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-13 Thread Tomas Vondra
On 13.5.2014 21:03, Andrew Dunstan wrote:
 
 On 05/13/2014 02:39 PM, Tomas Vondra wrote:
 On 13.5.2014 20:27, Tomas Vondra wrote:
 Hi all,

 a few days ago I switched magpie into an LXC container, and while
 fixinig unrelated issue there, I noticed that although the tests seemed
 to run, some of the results are actually rubbish because of missing
 locales.
 I forgot to mention that I noticed another (possible) issue - ISTM the
 buildfarm client is not packing all the log files. For for the failure
 in REL9_3_STABLE, there are 32 log files packed:


 http://www.pgbuildfarm.org/cgi-bin/show_log.pl?nm=magpiedt=2014-05-13%2014%3A59%3A46


 However in the lastrun-logs I see 81 files, and upon closer inspection
 it seems that pretty much logs for all custom locales (cs_CZ.*, sk_SK.*)
 are missing:

  initdb-cs_CZ.ISO-8859-2
  initdb-cs_CZ.UTF-8
  initdb-cs_CZ.WIN-1250
  initdb-sk_SK.ISO-8859-2
  initdb-sk_SK.UTF-8
  install-check-cs_CZ.ISO-8859-2
  install-check-cs_CZ.UTF-8
  install-check-cs_CZ.WIN-1250
  install-check-sk_SK.ISO-8859-2
  install-check-sk_SK.UTF-8
  pl-install-check-cs_CZ.ISO-8859-2
  pl-install-check-cs_CZ.UTF-8
  pl-install-check-cs_CZ.WIN-1250
  pl-install-check-sk_SK.ISO-8859-2
  pl-install-check-sk_SK.UTF-8
  startdb-cs_CZ.ISO-8859-2-1
  startdb-cs_CZ.ISO-8859-2-2
  startdb-cs_CZ.ISO-8859-2-3
  startdb-cs_CZ.UTF-8-1
  startdb-cs_CZ.UTF-8-2
  startdb-cs_CZ.UTF-8-3
  startdb-cs_CZ.WIN-1250-1
  startdb-cs_CZ.WIN-1250-2
  startdb-sk_SK.ISO-8859-2-1
  startdb-sk_SK.ISO-8859-2-2
  startdb-sk_SK.ISO-8859-2-3
  startdb-sk_SK.UTF-8-1
  startdb-sk_SK.UTF-8-2
  startdb-sk_SK.UTF-8-3
  stopdb-cs_CZ.ISO-8859-2-1
  stopdb-cs_CZ.ISO-8859-2-2
  stopdb-cs_CZ.ISO-8859-2-3
  stopdb-cs_CZ.UTF-8-1
  stopdb-cs_CZ.UTF-8-2
  stopdb-cs_CZ.UTF-8-3
  stopdb-cs_CZ.WIN-1250-1
  stopdb-sk_SK.ISO-8859-2-1
  stopdb-sk_SK.ISO-8859-2-2
  stopdb-sk_SK.ISO-8859-2-3
  stopdb-sk_SK.UTF-8-1
  stopdb-sk_SK.UTF-8-2
  stopdb-sk_SK.UTF-8-3

 A few days back this was reported properly
 (http://www.pgbuildfarm.org/cgi-bin/show_log.pl?nm=magpiedt=2014-04-17%2000%3A07%3A36).

 I doubt this is related to the LXC container, but I was running 4.9
 version of the buildfarm client back then, while now it's running in
 4.12. I'd guess this is the actual cause.

 
 
 Clearly this is from a different run, since the link above shows the run
 failing at the contrib-install-check-en_US stage, and so of course all
 those other locales would not have been checked in that run.

Um, no, I think this (the last run in 9.3 @ magpie):


http://www.pgbuildfarm.org/cgi-bin/show_log.pl?nm=magpiedt=2014-05-13%2014%3A59%3A46

ends with this:

   Details for system magpie failure at stage PLCheck-cs_CZ.WIN-1250,
snapshot taken 2014-05-13 14:59:46

 So there is no error here. If you want to check for an error compare the
 contents of lastrun-logs/*.log with the contents of runlogs.tgz.

$ pwd
/var/buildfarm/magpie/build/REL9_3_STABLE/magpie.lastrun-logs

http://pastebin.com/xC33LjrE (tar -tzvf runlogs.tgz)
http://pastebin.com/HsLJmLzW (ls -l)


But maybe I'm missing something ...

Tomas


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-13 Thread Tom Lane
Tomas Vondra t...@fuzzy.cz writes:
 On 13.5.2014 21:03, Andrew Dunstan wrote:
 On 05/13/2014 02:39 PM, Tomas Vondra wrote:
 However in the lastrun-logs I see 81 files, and upon closer inspection
 it seems that pretty much logs for all custom locales (cs_CZ.*, sk_SK.*)
 are missing:

 Clearly this is from a different run, since the link above shows the run
 failing at the contrib-install-check-en_US stage, and so of course all
 those other locales would not have been checked in that run.

 Um, no, I think this (the last run in 9.3 @ magpie):

Yeah, Tomas is right: there's something wrong with the uploading of non-C
locale test logs as of the 4.12 buildfarm scripts.  Here's what the
buildfarm shows for dromedary's last run:

http://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=dromedarydt=2014-05-13%2019%3A06%3A12

You can see that it's configured to do C and en_US.UTF-8 locales.  A look
into the lastrun-logs directory shows that it did:

$ ls
SCM-checkout.logpl-install-check-en_US.UTF-8
check-pg_upgrade.logrunlogs.tgz
check.log   startdb-C-1.log
config.log  startdb-C-2.log
configure.log   startdb-C-3.log
contrib-install-check-C.log startdb-C-4.log
contrib-install-check-en_US.UTF-8   startdb-en_US.UTF-8-1
ecpg-check.log  startdb-en_US.UTF-8-2
githead.log startdb-en_US.UTF-8-3
initdb-C.logstopdb-C-1.log
initdb-en_US.UTF-8  stopdb-C-2.log
install-check-C.log stopdb-C-3.log
install-check-en_US.UTF-8   stopdb-C-4.log
install-contrib.log stopdb-en_US.UTF-8-1
isolation-check.log stopdb-en_US.UTF-8-2
make-contrib.logstopdb-en_US.UTF-8-3
make-install.logtest-decoding-check.log
make.logtypedefs.log
pl-install-check-C.log  web-txn.data

but not all those files got put into runlogs.tgz:

$ tar tvfz runlogs.tgz
-rw-r--r--  0 buildfarm staff  40 May 13 15:06 githead.log
-rw-r--r--  0 buildfarm staff 520 May 13 15:06 SCM-checkout.log
-rw-r--r--  0 buildfarm staff  319642 May 13 15:07 config.log
-rw-r--r--  0 buildfarm staff   16060 May 13 15:07 configure.log
-rw-r--r--  0 buildfarm staff  293997 May 13 15:12 make.log
-rw-r--r--  0 buildfarm staff 2757773 May 13 15:13 check.log
-rw-r--r--  0 buildfarm staff   60462 May 13 15:13 make-contrib.log
-rw-r--r--  0 buildfarm staff   39763 May 13 15:13 make-install.log
-rw-r--r--  0 buildfarm staff   29673 May 13 15:13 install-contrib.log
-rw-r--r--  0 buildfarm staff   87376 May 13 15:15 check-pg_upgrade.log
-rw-r--r--  0 buildfarm staff  181571 May 13 15:16 test-decoding-check.log
-rw-r--r--  0 buildfarm staff1455 May 13 15:16 initdb-C.log
-rw-r--r--  0 buildfarm staff 540 May 13 15:17 startdb-C-1.log
-rw-r--r--  0 buildfarm staff 2692169 May 13 15:17 install-check-C.log
-rw-r--r--  0 buildfarm staff 297 May 13 15:17 stopdb-C-1.log
-rw-r--r--  0 buildfarm staff 540 May 13 15:18 startdb-C-2.log
-rw-r--r--  0 buildfarm staff 1678033 May 13 15:19 isolation-check.log
-rw-r--r--  0 buildfarm staff 296 May 13 15:19 stopdb-C-2.log
-rw-r--r--  0 buildfarm staff 540 May 13 15:19 startdb-C-3.log
-rw-r--r--  0 buildfarm staff  204501 May 13 15:19 pl-install-check-C.log
-rw-r--r--  0 buildfarm staff 296 May 13 15:19 stopdb-C-3.log
-rw-r--r--  0 buildfarm staff 540 May 13 15:19 startdb-C-4.log
-rw-r--r--  0 buildfarm staff  886518 May 13 15:20 contrib-install-check-C.log
-rw-r--r--  0 buildfarm staff 297 May 13 15:20 stopdb-C-4.log
-rw-r--r--  0 buildfarm staff  132144 May 13 15:23 ecpg-check.log
-rw-r--r--  0 buildfarm staff   35834 May 13 15:25 typedefs.log

which doubtless explains why they're not available from the buildfarm
server.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-13 Thread Heikki Linnakangas

On 05/13/2014 09:58 PM, Tom Lane wrote:

Tomas Vondra t...@fuzzy.cz writes:

a few days ago I switched magpie into an LXC container, and while
fixinig unrelated issue there, I noticed that although the tests seemed
to run, some of the results are actually rubbish because of missing locales.



So for example when the system misses cs_CZ.WIN-1250 (i.e. czech locale
with windows codepage 1250), initdb actually did this



 The files belonging to this database system will be owned by user
 pgbuild.
 This user must also own the server process.



 initdb: invalid locale name 
 initdb: invalid locale name 
 initdb: invalid locale name 
 initdb: invalid locale name 
 initdb: invalid locale name 
 initdb: invalid locale name 
 The database cluster will be initialized with locale C.
 The default database encoding has accordingly been set to
 SQL_ASCII.
 The default text search configuration will be set to english.


Hm, I'm a bit confused as to what you actually did here.  The invalid
locale name bleats seem to indicate that no --locale or --lc_xxx options
were given on the command line; correct?  If so the issue is presumably
that the environment variable(s) were set to incorrect values.  While
we *could* abort in that situation, I've never heard of any program
that did; the normal response is to silently ignore the environment
variables and use C locale.  We're not being exactly silent about it
but I think the outcome is the expected one.


Initdb isn't like most programs. The locale given to initdb is memorized 
in the data directory, and if you later notice that it was wrong, you'll 
have to dump and reload. There is a strong argument for initdb to be 
more strict than, say, your average text editor.



There is a comment in the code about this:

 /* should we exit here? */
 if (res == NULL)
 fprintf(stderr, _(%s: invalid locale name \%s\\n),
 progname, locale);

but I think what's being questioned is whether an incorrect locale
name *given as a command line argument* should result in an abort.
That might be a good idea, but it looks like it'd require some
restructuring of the code to make it possible to distinguish the
case from bad-environment.


Yeah, I think we should definitely complain and exit if the locale given 
on the command-line is invalid.


- Heikki


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-13 Thread Tomas Vondra
On 13.5.2014 20:58, Tom Lane wrote:
 Tomas Vondra t...@fuzzy.cz writes:
 a few days ago I switched magpie into an LXC container, and while
 fixinig unrelated issue there, I noticed that although the tests seemed
 to run, some of the results are actually rubbish because of missing locales.
 
 So for example when the system misses cs_CZ.WIN-1250 (i.e. czech locale
 with windows codepage 1250), initdb actually did this
 
 The files belonging to this database system will be owned by user
 pgbuild.
 This user must also own the server process.
 
 initdb: invalid locale name 
 initdb: invalid locale name 
 initdb: invalid locale name 
 initdb: invalid locale name 
 initdb: invalid locale name 
 initdb: invalid locale name 
 The database cluster will be initialized with locale C.
 The default database encoding has accordingly been set to
 SQL_ASCII.
 The default text search configuration will be set to english.
 
 Hm, I'm a bit confused as to what you actually did here.  The invalid
 locale name bleats seem to indicate that no --locale or --lc_xxx options
 were given on the command line; correct?  If so the issue is presumably
 that the environment variable(s) were set to incorrect values.  While
 we *could* abort in that situation, I've never heard of any program
 that did; the normal response is to silently ignore the environment
 variables and use C locale.  We're not being exactly silent about it
 but I think the outcome is the expected one.
 
 There is a comment in the code about this:
 
 /* should we exit here? */
 if (res == NULL)
 fprintf(stderr, _(%s: invalid locale name \%s\\n),
 progname, locale);
 
 but I think what's being questioned is whether an incorrect locale
 name *given as a command line argument* should result in an abort.
 That might be a good idea, but it looks like it'd require some
 restructuring of the code to make it possible to distinguish the
 case from bad-environment.

H. Actually the logs above were generated manually by setting LANG
to an incorrect value on my desktop machine, i.e. something like this:

   export LANG=cs_CZ.WIN-1250
   pg_ctl -D /tmp/test init

I did this because the logs from the buildfarm animal contain the
messages in czech, which makes them rather unsuitable for english speakers:


http://www.pgbuildfarm.org/cgi-bin/show_stage_log.pl?nm=magpiedt=2014-04-17%2000%3A07%3A36stg=initdb-cs_CZ.WIN-1250

I suppose that's why the locale name is empty (it simply gets empty
string from the environment), while the buildfarm client uses a
command-line option to pass the name.

Also, I now noticed that while the example I posted has to C.SQL_ASCII,
the buildfarm then uses cs_CZ.UTF-8. I guess this possible thanks to the
'--locale' option.

If it was up to me, I'd vote to fail in such cases. I find it confusing
(and a bit 'automagic') to receive invalid locale but decide 'the user
surely meant C with SQL_ASCII'. I've actually had to deal with a few
production installations that were installed like this, and it was PITA
to fix.

Anyway, that's not what I was proposing here. I merely think we should
do a simple 'did we actually get the right locale' check somewhere in
the buildfarm client.

 Yeah, not really what we were shooting for. I've fixed this by defining
 the missing locales, and indeed - magpie now fails in plpython tests.
 
 I saw that earlier today (tho right now the buildfarm server seems to
 not be responding :-().  Probably we should use some more-widely-used
 character code in that specific test?

What do you mean by 'not responding'? The tests were not running for ~2
days because of LXC/SELinux issue, causing cron failures.

Anyway, using a more-widely used character is probably the best fix
possible here.

regards
Tomas


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-13 Thread Tom Lane
Heikki Linnakangas hlinnakan...@vmware.com writes:
 On 05/13/2014 09:58 PM, Tom Lane wrote:
 ... If so the issue is presumably
 that the environment variable(s) were set to incorrect values.  While
 we *could* abort in that situation, I've never heard of any program
 that did; the normal response is to silently ignore the environment
 variables and use C locale.  We're not being exactly silent about it
 but I think the outcome is the expected one.

 Initdb isn't like most programs. The locale given to initdb is memorized 
 in the data directory, and if you later notice that it was wrong, you'll 
 have to dump and reload. There is a strong argument for initdb to be 
 more strict than, say, your average text editor.

Hm, well, if that's the behavior we want then it's certainly an easy
change.

But independently of whether it's a fatal error or not: when there's
no relevant command-line argument then we print the

invalid locale name 

message which is surely pretty unhelpful.  It'd be better if we could
finger the incorrect environment setting.  Unfortunately, we don't know
for sure which environment variable(s) setlocale was looking at --- I
believe it's somewhat platform specific.  We could probably print
something like this instead:

environment locale settings are invalid

Thoughts?

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-13 Thread Andrew Dunstan


On 05/13/2014 03:40 PM, Tom Lane wrote:

Tomas Vondra t...@fuzzy.cz writes:

On 13.5.2014 21:03, Andrew Dunstan wrote:

On 05/13/2014 02:39 PM, Tomas Vondra wrote:

However in the lastrun-logs I see 81 files, and upon closer inspection
it seems that pretty much logs for all custom locales (cs_CZ.*, sk_SK.*)
are missing:

Clearly this is from a different run, since the link above shows the run
failing at the contrib-install-check-en_US stage, and so of course all
those other locales would not have been checked in that run.

Um, no, I think this (the last run in 9.3 @ magpie):

Yeah, Tomas is right: there's something wrong with the uploading of non-C
locale test logs as of the 4.12 buildfarm scripts.  Here's what the
buildfarm shows for dromedary's last run:

http://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=dromedarydt=2014-05-13%2019%3A06%3A12

You can see that it's configured to do C and en_US.UTF-8 locales.  A look
into the lastrun-logs directory shows that it did:

$ ls
SCM-checkout.logpl-install-check-en_US.UTF-8
check-pg_upgrade.logrunlogs.tgz
check.log   startdb-C-1.log
config.log  startdb-C-2.log
configure.log   startdb-C-3.log
contrib-install-check-C.log startdb-C-4.log
contrib-install-check-en_US.UTF-8   startdb-en_US.UTF-8-1
ecpg-check.log  startdb-en_US.UTF-8-2
githead.log startdb-en_US.UTF-8-3
initdb-C.logstopdb-C-1.log
initdb-en_US.UTF-8  stopdb-C-2.log
install-check-C.log stopdb-C-3.log
install-check-en_US.UTF-8   stopdb-C-4.log
install-contrib.log stopdb-en_US.UTF-8-1
isolation-check.log stopdb-en_US.UTF-8-2
make-contrib.logstopdb-en_US.UTF-8-3
make-install.logtest-decoding-check.log
make.logtypedefs.log
pl-install-check-C.log  web-txn.data

but not all those files got put into runlogs.tgz:

$ tar tvfz runlogs.tgz
-rw-r--r--  0 buildfarm staff  40 May 13 15:06 githead.log
-rw-r--r--  0 buildfarm staff 520 May 13 15:06 SCM-checkout.log
-rw-r--r--  0 buildfarm staff  319642 May 13 15:07 config.log
-rw-r--r--  0 buildfarm staff   16060 May 13 15:07 configure.log
-rw-r--r--  0 buildfarm staff  293997 May 13 15:12 make.log
-rw-r--r--  0 buildfarm staff 2757773 May 13 15:13 check.log
-rw-r--r--  0 buildfarm staff   60462 May 13 15:13 make-contrib.log
-rw-r--r--  0 buildfarm staff   39763 May 13 15:13 make-install.log
-rw-r--r--  0 buildfarm staff   29673 May 13 15:13 install-contrib.log
-rw-r--r--  0 buildfarm staff   87376 May 13 15:15 check-pg_upgrade.log
-rw-r--r--  0 buildfarm staff  181571 May 13 15:16 test-decoding-check.log
-rw-r--r--  0 buildfarm staff1455 May 13 15:16 initdb-C.log
-rw-r--r--  0 buildfarm staff 540 May 13 15:17 startdb-C-1.log
-rw-r--r--  0 buildfarm staff 2692169 May 13 15:17 install-check-C.log
-rw-r--r--  0 buildfarm staff 297 May 13 15:17 stopdb-C-1.log
-rw-r--r--  0 buildfarm staff 540 May 13 15:18 startdb-C-2.log
-rw-r--r--  0 buildfarm staff 1678033 May 13 15:19 isolation-check.log
-rw-r--r--  0 buildfarm staff 296 May 13 15:19 stopdb-C-2.log
-rw-r--r--  0 buildfarm staff 540 May 13 15:19 startdb-C-3.log
-rw-r--r--  0 buildfarm staff  204501 May 13 15:19 pl-install-check-C.log
-rw-r--r--  0 buildfarm staff 296 May 13 15:19 stopdb-C-3.log
-rw-r--r--  0 buildfarm staff 540 May 13 15:19 startdb-C-4.log
-rw-r--r--  0 buildfarm staff  886518 May 13 15:20 contrib-install-check-C.log
-rw-r--r--  0 buildfarm staff 297 May 13 15:20 stopdb-C-4.log
-rw-r--r--  0 buildfarm staff  132144 May 13 15:23 ecpg-check.log
-rw-r--r--  0 buildfarm staff   35834 May 13 15:25 typedefs.log

which doubtless explains why they're not available from the buildfarm
server.






There's a recently introduced bug, which I'm at a bit of a loss to 
explain. I have pushed a fix for it. 
https://github.com/PGBuildFarm/client-code/commit/0948ac741d6b2c6eeb8b46632db7ec25e24161d9


I will get out a bug fix release shortly.

cheers

andrew






--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-13 Thread Christoph Berg
Re: Tom Lane 2014-05-13 27525.1400012...@sss.pgh.pa.us
 Heikki Linnakangas hlinnakan...@vmware.com writes:
  On 05/13/2014 09:58 PM, Tom Lane wrote:
  ... If so the issue is presumably
  that the environment variable(s) were set to incorrect values.  While
  we *could* abort in that situation, I've never heard of any program
  that did; the normal response is to silently ignore the environment
  variables and use C locale.  We're not being exactly silent about it
  but I think the outcome is the expected one.
 
  Initdb isn't like most programs. The locale given to initdb is memorized 
  in the data directory, and if you later notice that it was wrong, you'll 
  have to dump and reload. There is a strong argument for initdb to be 
  more strict than, say, your average text editor.
 
 Hm, well, if that's the behavior we want then it's certainly an easy
 change.

It should definitely fail. If you have some LC_ variables set, you
want to store that charset in your database. If the DB ends up using
C, that's not helpful. (Or probably even worse, as SQL_ASCII will
accept binary garbage without checking anything, so you'll only notice
when it's too late.)

Bad locales are the #1 reason for initdb problems at install time for
Debian packages - while pg_createcluster catches some of these itself
before invoking initdb, making the process more deterministic would be
a good thing.

 But independently of whether it's a fatal error or not: when there's
 no relevant command-line argument then we print the
 
   invalid locale name 
 
 message which is surely pretty unhelpful.  It'd be better if we could
 finger the incorrect environment setting.  Unfortunately, we don't know
 for sure which environment variable(s) setlocale was looking at --- I
 believe it's somewhat platform specific.  We could probably print
 something like this instead:
 
   environment locale settings are invalid

Definitely a good plan. The current behavior is just not helpful:

$ LANG=de_DE.utf-9 /usr/lib/postgresql/9.4/bin/initdb -D /tmp/bar
The files belonging to this database system will be owned by user cbe.
This user must also own the server process.

initdb: invalid locale name 
initdb: invalid locale name 
initdb: invalid locale name 
initdb: invalid locale name 
initdb: invalid locale name 
initdb: invalid locale name 
The database cluster will be initialized with locale C.
The default database encoding has accordingly been set to SQL_ASCII.
The default text search configuration will be set to english.

Christoph
-- 
c...@df7cb.de | http://www.df7cb.de/


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-13 Thread Andrew Dunstan


On 05/13/2014 04:14 PM, Tom Lane wrote:

Heikki Linnakangas hlinnakan...@vmware.com writes:

On 05/13/2014 09:58 PM, Tom Lane wrote:

... If so the issue is presumably
that the environment variable(s) were set to incorrect values.  While
we *could* abort in that situation, I've never heard of any program
that did; the normal response is to silently ignore the environment
variables and use C locale.  We're not being exactly silent about it
but I think the outcome is the expected one.

Initdb isn't like most programs. The locale given to initdb is memorized
in the data directory, and if you later notice that it was wrong, you'll
have to dump and reload. There is a strong argument for initdb to be
more strict than, say, your average text editor.

Hm, well, if that's the behavior we want then it's certainly an easy
change.

But independently of whether it's a fatal error or not: when there's
no relevant command-line argument then we print the

invalid locale name 

message which is surely pretty unhelpful.  It'd be better if we could
finger the incorrect environment setting.  Unfortunately, we don't know
for sure which environment variable(s) setlocale was looking at --- I
believe it's somewhat platform specific.  We could probably print
something like this instead:

environment locale settings are invalid

Thoughts?






I'd also be tempted to add the settings for LC_ALL and LANG and note 
that they are possible sources of the problem, or maybe only do that if 
they match the locale being rejected.


cheers

andrew


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] buildfarm / handling (undefined) locales

2014-05-13 Thread Tom Lane
Andrew Dunstan and...@dunslane.net writes:
 On 05/13/2014 04:14 PM, Tom Lane wrote:
 But independently of whether it's a fatal error or not: when there's
 no relevant command-line argument then we print the
 
 invalid locale name 
 
 message which is surely pretty unhelpful.  It'd be better if we could
 finger the incorrect environment setting.  Unfortunately, we don't know
 for sure which environment variable(s) setlocale was looking at --- I
 believe it's somewhat platform specific.  We could probably print
 something like this instead:
 
 environment locale settings are invalid
 
 Thoughts?

 I'd also be tempted to add the settings for LC_ALL and LANG and note 
 that they are possible sources of the problem, or maybe only do that if 
 they match the locale being rejected.

Well, all we know is that we asked setlocale for the default locale from
the environment and it failed.  We don't really know which variable(s)
it looked at.  I think printing specific variables could easily be as
misleading as it is helpful; and given the lack of prior complaints,
I'm doubtful that it's worth going to the effort of trying to print a
platform-specific collection of variables.

Attached is a draft patch that behaves like this:

$ initdb --locale=foo
The files belonging to this database system will be owned by user postgres.
This user must also own the server process.

initdb: invalid locale name foo

$ LANG=foo initdb
The files belonging to this database system will be owned by user postgres.
This user must also own the server process.

initdb: environment locale settings are invalid

Most of the bulk of the patch is getting rid of the no-longer-useful
boolean result of check_locale_name.

regards, tom lane

diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 2a51916..3a9dc53 100644
*** a/src/bin/initdb/initdb.c
--- b/src/bin/initdb/initdb.c
*** static void trapsig(int signum);
*** 252,258 
  static void check_ok(void);
  static char *escape_quotes(const char *src);
  static int	locale_date_order(const char *locale);
! static bool check_locale_name(int category, const char *locale,
    char **canonname);
  static bool check_locale_encoding(const char *locale, int encoding);
  static void setlocales(void);
--- 252,258 
  static void check_ok(void);
  static char *escape_quotes(const char *src);
  static int	locale_date_order(const char *locale);
! static void check_locale_name(int category, const char *locale,
    char **canonname);
  static bool check_locale_encoding(const char *locale, int encoding);
  static void setlocales(void);
*** locale_date_order(const char *locale)
*** 2529,2535 
  }
  
  /*
!  * Is the locale name valid for the locale category?
   *
   * If successful, and canonname isn't NULL, a malloc'd copy of the locale's
   * canonical name is stored there.  This is especially useful for figuring out
--- 2529,2535 
  }
  
  /*
!  * Verify that locale name is valid for the locale category.
   *
   * If successful, and canonname isn't NULL, a malloc'd copy of the locale's
   * canonical name is stored there.  This is especially useful for figuring out
*** locale_date_order(const char *locale)
*** 2540,2546 
   *
   * this should match the backend's check_locale() function
   */
! static bool
  check_locale_name(int category, const char *locale, char **canonname)
  {
  	char	   *save;
--- 2540,2546 
   *
   * this should match the backend's check_locale() function
   */
! static void
  check_locale_name(int category, const char *locale, char **canonname)
  {
  	char	   *save;
*** check_locale_name(int category, const ch
*** 2551,2557 
  
  	save = setlocale(category, NULL);
  	if (!save)
! 		return false;			/* won't happen, we hope */
  
  	/* save may be pointing at a modifiable scratch variable, so copy it. */
  	save = pg_strdup(save);
--- 2551,2561 
  
  	save = setlocale(category, NULL);
  	if (!save)
! 	{
! 		fprintf(stderr, _(%s: setlocale failed\n),
! progname);
! 		exit(1);
! 	}
  
  	/* save may be pointing at a modifiable scratch variable, so copy it. */
  	save = pg_strdup(save);
*** check_locale_name(int category, const ch
*** 2565,2580 
  
  	/* restore old value. */
  	if (!setlocale(category, save))
  		fprintf(stderr, _(%s: failed to restore old locale \%s\\n),
  progname, save);
  	free(save);
  
! 	/* should we exit here? */
  	if (res == NULL)
! 		fprintf(stderr, _(%s: invalid locale name \%s\\n),
! progname, locale);
! 
! 	return (res != NULL);
  }
  
  /*
--- 2569,2602 
  
  	/* restore old value. */
  	if (!setlocale(category, save))
+ 	{
  		fprintf(stderr, _(%s: failed to restore old locale \%s\\n),
  progname, save);
+ 		exit(1);
+ 	}
  	free(save);
  
! 	/* complain if locale wasn't valid */
  	if (res == NULL)
! 	{
! 		if (*locale)
! 			fprintf(stderr, _(%s: invalid locale name \%s\\n),
! 	progname, locale);
!