On Friday 22 May 2009 18:27:01 Konstantin Izmailov wrote:
> 3. character_octet_length should always be double of
> character_maximum_length (due to Unicode character size on Windows which is
> 2).
I have the attached patch that would make character_octet_length the product
of character_octet_length and the maximum octet length of a single character
in the selected server encoding. So for UTF-8, this would be factor 4. This
doesn't exactly correspond to the behavior that you expect, but I think it's
more correct overall anyway.
diff --git a/doc/src/sgml/information_schema.sgml b/doc/src/sgml/information_schema.sgml
index 8e145d7..6460862 100644
--- a/doc/src/sgml/information_schema.sgml
+++ b/doc/src/sgml/information_schema.sgml
@@ -343,10 +343,10 @@
<entry><type>cardinal_number</type></entry>
<entry>
If <literal>data_type</literal> identifies a character type,
- the maximum possible length in octets (bytes) of a datum (this
- should not be of concern to
- <productname>PostgreSQL</productname> users); null for all
- other data types.
+ the maximum possible length in octets (bytes) of a datum; null
+ for all other data types. The maximum octet length depends on
+ the declared character maximum length (see above) and the
+ server encoding.
</entry>
</row>
@@ -947,9 +947,10 @@
<entry><type>cardinal_number</type></entry>
<entry>
If <literal>data_type</literal> identifies a character type,
- the maximum possible length in octets (bytes) of a datum (this
- should not be of concern to <productname>PostgreSQL</productname> users); null for all
- other data types.
+ the maximum possible length in octets (bytes) of a datum; null
+ for all other data types. The maximum octet length depends on
+ the declared character maximum length (see above) and the
+ server encoding.
</entry>
</row>
@@ -1688,9 +1689,9 @@
<entry><type>cardinal_number</type></entry>
<entry>
If the domain has a character type, the maximum possible length
- in octets (bytes) of a datum (this should not be of concern to
- <productname>PostgreSQL</productname> users); null for all
- other data types.
+ in octets (bytes) of a datum; null for all other data types.
+ The maximum octet length depends on the declared character
+ maximum length (see above) and the server encoding.
</entry>
</row>
diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql
index fe75322..cd6258b 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -102,11 +102,7 @@ CREATE FUNCTION _pg_char_octet_length(typid oid, typmod int4) RETURNS integer
IMMUTABLE
RETURNS NULL ON NULL INPUT
AS
-$$SELECT
- CASE WHEN $1 IN (25, 1042, 1043) /* text, char, varchar */
- THEN CAST(2^30 AS integer)
- ELSE null
- END$$;
+$$SELECT information_schema._pg_char_max_length($1, $2) * pg_encoding_max_length((SELECT encoding FROM pg_database WHERE datname = current_database()))$$;
CREATE FUNCTION _pg_numeric_precision(typid oid, typmod int4) RETURNS integer
LANGUAGE sql
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 753c927..058493c 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -482,6 +482,17 @@ length_in_encoding(PG_FUNCTION_ARGS)
}
+Datum
+pg_encoding_max_length_sql(PG_FUNCTION_ARGS)
+{
+ int encoding = PG_GETARG_INT32(0);
+
+ if (PG_VALID_ENCODING(encoding))
+ return pg_wchar_table[encoding].maxmblen;
+ else
+ PG_RETURN_NULL();
+}
+
/*
* convert client encoding to server encoding.
*/
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 0285acd..e194d6a 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2278,6 +2278,9 @@ DESCR("convert encoding name to encoding id");
DATA(insert OID = 1597 ( pg_encoding_to_char PGNSP PGUID 12 1 0 0 f f f t f s 1 0 19 "23" _null_ _null_ _null_ _null_ PG_encoding_to_char _null_ _null_ _null_ ));
DESCR("convert encoding id to encoding name");
+DATA(insert OID = 2319 ( pg_encoding_max_length PGNSP PGUID 12 1 0 0 f f f t f i 1 0 23 "23" _null_ _null_ _null_ _null_ pg_encoding_max_length_sql _null_ _null_ _null_ ));
+DESCR("maximum octet length of a character in an eocidng");
+
DATA(insert OID = 1638 ( oidgt PGNSP PGUID 12 1 0 0 f f f t f i 2 0 16 "26 26" _null_ _null_ _null_ _null_ oidgt _null_ _null_ _null_ ));
DESCR("greater-than");
DATA(insert OID = 1639 ( oidge PGNSP PGUID 12 1 0 0 f f f t f i 2 0 16 "26 26" _null_ _null_ _null_ _null_ oidge _null_ _null_ _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index c1b9393..13fd41a 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -948,6 +948,7 @@ extern Datum pg_convert(PG_FUNCTION_ARGS);
extern Datum pg_convert_to(PG_FUNCTION_ARGS);
extern Datum pg_convert_from(PG_FUNCTION_ARGS);
extern Datum length_in_encoding(PG_FUNCTION_ARGS);
+extern Datum pg_encoding_max_length_sql(PG_FUNCTION_ARGS);
/* format_type.c */
extern Datum format_type(PG_FUNCTION_ARGS);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers