Hi
2016-01-04 12:46 GMT+01:00 Shulgin, Oleksandr <[email protected]>
:
> On Wed, Dec 30, 2015 at 8:28 PM, Pavel Stehule <[email protected]>
> wrote:
>
>>
>>
>> 2015-12-30 17:33 GMT+01:00 Robert Haas <[email protected]>:
>>
>>> On Mon, Dec 28, 2015 at 8:45 AM, Shulgin, Oleksandr
>>> <[email protected]> wrote:
>>> > I didn't check out earlier versions of this patch, but the latest one
>>> still
>>> > changes pg_size_pretty() to emit PB suffix.
>>> >
>>> > I don't think it is worth it to throw a number of changes together like
>>> > that. We should focus on adding pg_size_bytes() first and make it
>>> > compatible with both pg_size_pretty() and existing GUC units: that is
>>> > support suffixes up to TB and make sure they have the meaning of
>>> powers of
>>> > 2^10, not 10^3. Re-using the table present in guc.c would be a plus.
>>> >
>>> > Next, we could think about adding handling of PB suffix on input and
>>> output,
>>> > but I don't see a big problem if that is emitted as 1024TB or the user
>>> has
>>> > to specify it as 1024TB in a GUC or argument to pg_size_bytes(): an
>>> minor
>>> > inconvenience only.
>>>
>>> +1 to everything in this email.
>>>
>>
>> so I removed support for PB and SI units. Now the
>> memory_unit_conversion_table is shared.
>>
>
> Looks better, thanks.
>
> I'm not sure why the need to touch the regression test for
> pg_size_pretty():
>
> ! 10.5 | 10.5 bytes | -10.5 bytes
> ! 1000.5 | 1000.5 bytes | -1000.5 bytes
> ! 1000000.5 | 977 kB | -977 kB
> ! 1000000000.5 | 954 MB | -954 MB
> ! 1000000000000.5 | 931 GB | -931 GB
> ! 1000000000000000.5 | 909 TB | -909 TB
>
>
fixed
> A nitpick, this loop:
>
> + while (*cp)
> + {
> + if ((isdigit(*cp) || *cp == '.') && ndigits < MAX_DIGITS)
> + digits[ndigits++] = *cp++;
> + else
> + break;
> + }
>
> would be a bit easier to parse if spelled as:
>
> + while (*cp && (isdigit(*cp) || *cp == '.') && ndigits < MAX_DIGITS)
> + digits[ndigits++] = *cp++;
>
fixed
>
> On the other hand, this seems to truncate the digits silently:
>
> + digits[ndigits] = '\0';
>
> I don't think we want that, e.g:
>
> postgres=# select pg_size_bytes('9223372036854775807.9');
> ERROR: invalid unit "9"
> HINT: Valid units for this parameter are "kB", "MB", "GB", and "TB".
>
> I think making a mutable copy of the input string and truncating it before
> passing to numeric_in() would make more sense--no need to hard-code
> MAX_DIGITS. The same goes for hard-coding MAX_UNIT_LEN, e.g. compare the
> following two outputs:
>
> postgres=# select pg_size_bytes('1 KiB');
> ERROR: invalid unit "KiB"
> HINT: Valid units for this parameter are "kB", "MB", "GB", and "TB".
>
> postgres=# select pg_size_bytes('1024 bytes');
> ERROR: invalid format
>
>
fixed
> I believe we should see a similar error message and a hint in the latter
> case. (No, I don't think we should add support for 'bytes' as a unit, not
> even for "compatibility" with pg_size_pretty()--for one, I don't think it
> would be wise to expect pg_size_bytes() to be able to deparse *every*
> possible output produced by pg_size_pretty() as it's purpose is
> human-readable display; also, pg_size_pretty() can easily produce output
> that doesn't fit into bigint type, or is just negative)
>
> Code comments and doc change need proof-reading by a native English
> speaker, which I am not.
>
Regards
Pavel
>
> --
> Alex
>
>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
new file mode 100644
index 8ef9fce..6b921ae
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
*************** postgres=# SELECT * FROM pg_xlogfile_nam
*** 17607,17612 ****
--- 17607,17615 ----
<primary>pg_relation_size</primary>
</indexterm>
<indexterm>
+ <primary>pg_size_bytes</primary>
+ </indexterm>
+ <indexterm>
<primary>pg_size_pretty</primary>
</indexterm>
<indexterm>
*************** postgres=# SELECT * FROM pg_xlogfile_nam
*** 17677,17682 ****
--- 17680,17696 ----
</entry>
</row>
<row>
+ <entry>
+ <literal><function>pg_size_bytes(<type>text</type>)</function></literal>
+ </entry>
+ <entry><type>bigint</type></entry>
+ <entry>
+ Converts a size in human-readable format with size units
+ into bytes. The parameter is case insensitive string. Following
+ units are supported: kB, MB, GB, TB.
+ </entry>
+ </row>
+ <row>
<entry>
<literal><function>pg_size_pretty(<type>bigint</type>)</function></literal>
</entry>
diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
new file mode 100644
index 2084692..ce97467
*** a/src/backend/utils/adt/dbsize.c
--- b/src/backend/utils/adt/dbsize.c
***************
*** 25,30 ****
--- 25,31 ----
#include "storage/fd.h"
#include "utils/acl.h"
#include "utils/builtins.h"
+ #include "utils/guc.h"
#include "utils/numeric.h"
#include "utils/rel.h"
#include "utils/relfilenodemap.h"
*************** pg_size_pretty_numeric(PG_FUNCTION_ARGS)
*** 700,705 ****
--- 701,811 ----
}
/*
+ * Convert human readable size to long int.
+ *
+ * Due suppor decimal value and case insensitivity of units
+ * a function parse_intcannot be used.
+ */
+ Datum
+ pg_size_bytes(PG_FUNCTION_ARGS)
+ {
+ text *arg = PG_GETARG_TEXT_PP(0);
+ const char *str = text_to_cstring(arg);
+ const char *strptr = str;
+ char *buffer;
+ char *bufptr;
+ Numeric num;
+ int64 result;
+
+ /* working buffer cannot be longer than original string */
+ buffer = (char *) palloc(VARSIZE_ANY_EXHDR(arg) + 1);
+ bufptr = buffer;
+
+ /* Skip leading spaces */
+ while (isspace((unsigned char) *strptr))
+ strptr++;
+
+ switch (*strptr)
+ {
+ /* ignore plus symbol */
+ case '+':
+ strptr++;
+ break;
+ case '-':
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("size cannot be negative")));
+ }
+
+ /* copy digits to working buffer */
+ while (*strptr && (isdigit(*strptr) || *strptr == '.'))
+ *bufptr++ = *strptr++;
+ *bufptr = '\0';
+
+ /* don't allow empty string */
+ if (*buffer == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("\"%s\" is not number", str)));
+
+ num = DatumGetNumeric(DirectFunctionCall3(numeric_in,
+ CStringGetDatum(buffer), 0, -1));
+
+ /* allow whitespace between integer and unit */
+ while (isspace(*strptr))
+ strptr++;
+
+ /* Handle possible unit */
+ if (*strptr != '\0')
+ {
+ int multiplier;
+ Numeric mul_num;
+ const char *hintmsg;
+ const char *unitstr = strptr;
+
+ bufptr = buffer;
+
+ /* copy chars to buffer and stop on space */
+ while (*strptr && !isspace(*strptr))
+ *bufptr++ = *strptr++;
+ *bufptr = '\0';
+
+ /*
+ * Use buffer as unit if there are not any nonspace char,
+ * else use a original unit string.
+ */
+ while (isspace(*strptr))
+ strptr++;
+ if (*strptr == '\0')
+ unitstr = buffer;
+
+ if (!parse_memory_unit(unitstr, &multiplier, &hintmsg))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid unit: \"%s\"", unitstr),
+ errhint("%s", _(hintmsg))));
+
+ /*
+ * Now, the multiplier is in KB unit. It should be multiplied by 1024
+ * before usage
+ */
+ mul_num = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
+ Int64GetDatum(multiplier * 1024L)));
+
+ num = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
+ NumericGetDatum(mul_num),
+ NumericGetDatum(num)));
+ }
+
+ result = DatumGetInt64(DirectFunctionCall1(numeric_int8, NumericGetDatum(num)));
+
+ pfree(buffer);
+ pfree(str);
+
+ PG_RETURN_INT64(result);
+ }
+
+ /*
* Get the filenode of a relation
*
* This is expected to be used in queries like
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
new file mode 100644
index 38ba82f..00021fd
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
*************** convert_from_base_unit(int64 base_value,
*** 5238,5243 ****
--- 5238,5272 ----
/*
+ * Parse value as some known memory unit to their size in bytes.
+ * Used in pg_size_bytes function. Against convert_to_base_unit, a string
+ * comparation is case insensitive.
+ */
+ bool
+ parse_memory_unit(const char *unit, int *multiplier,
+ const char **hintmsg)
+ {
+ int i;
+
+ for (i = 0; *memory_unit_conversion_table[i].unit; i++)
+ {
+ const unit_conversion *conv = &memory_unit_conversion_table[i];
+
+ if ( conv->base_unit == GUC_UNIT_KB &&
+ strcasecmp(unit, conv->unit) == 0)
+ {
+ *multiplier = conv->multiplier;
+ return true;
+ }
+ }
+
+ *hintmsg = memory_units_hint;
+
+ return false;
+ }
+
+
+ /*
* Try to parse value as an integer. The accepted formats are the
* usual decimal, octal, or hexadecimal formats, optionally followed by
* a unit name if "flags" indicates a unit is allowed.
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
new file mode 100644
index e5d6c77..4b73e72
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
*************** DATA(insert OID = 2286 ( pg_total_relati
*** 3662,3667 ****
--- 3662,3669 ----
DESCR("total disk space usage for the specified table and associated indexes");
DATA(insert OID = 2288 ( pg_size_pretty PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 25 "20" _null_ _null_ _null_ _null_ _null_ pg_size_pretty _null_ _null_ _null_ ));
DESCR("convert a long int to a human readable text using size units");
+ DATA(insert OID = 3317 ( pg_size_bytes PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 20 "25" _null_ _null_ _null_ _null_ _null_ pg_size_bytes _null_ _null_ _null_ ));
+ DESCR("convert a human readable text with size units to long int bytes");
DATA(insert OID = 3166 ( pg_size_pretty PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 25 "1700" _null_ _null_ _null_ _null_ _null_ pg_size_pretty_numeric _null_ _null_ _null_ ));
DESCR("convert a numeric to a human readable text using size units");
DATA(insert OID = 2997 ( pg_table_size PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 20 "2205" _null_ _null_ _null_ _null_ _null_ pg_table_size _null_ _null_ _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
new file mode 100644
index bbaa2ce..5b08a6a
*** a/src/include/utils/builtins.h
--- b/src/include/utils/builtins.h
*************** extern Datum pg_relation_size(PG_FUNCTIO
*** 462,467 ****
--- 462,468 ----
extern Datum pg_total_relation_size(PG_FUNCTION_ARGS);
extern Datum pg_size_pretty(PG_FUNCTION_ARGS);
extern Datum pg_size_pretty_numeric(PG_FUNCTION_ARGS);
+ extern Datum pg_size_bytes(PG_FUNCTION_ARGS);
extern Datum pg_table_size(PG_FUNCTION_ARGS);
extern Datum pg_indexes_size(PG_FUNCTION_ARGS);
extern Datum pg_relation_filenode(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
new file mode 100644
index e1de1a5..3bfe0f4
*** a/src/include/utils/guc.h
--- b/src/include/utils/guc.h
*************** extern int NewGUCNestLevel(void);
*** 357,362 ****
--- 357,364 ----
extern void AtEOXact_GUC(bool isCommit, int nestLevel);
extern void BeginReportingGUCOptions(void);
extern void ParseLongOption(const char *string, char **name, char **value);
+ extern bool parse_memory_unit(const char *unit, int *multiplier,
+ const char **hintmsg);
extern bool parse_int(const char *value, int *result, int flags,
const char **hintmsg);
extern bool parse_real(const char *value, double *result);
diff --git a/src/test/regress/expected/dbsize.out b/src/test/regress/expected/dbsize.out
new file mode 100644
index aa513e7..89a2308
*** a/src/test/regress/expected/dbsize.out
--- b/src/test/regress/expected/dbsize.out
*************** SELECT size, pg_size_pretty(size), pg_si
*** 35,37 ****
--- 35,77 ----
1000000000000000.5 | 909 TB | -909 TB
(12 rows)
+ SELECT pg_size_bytes(size) FROM
+ (VALUES('1'), ('1kB'), ('1MB'), (' 1 GB'), ('1.5 GB '),
+ ('1TB'), ('3000 TB')) x(size);
+ pg_size_bytes
+ ------------------
+ 1
+ 1024
+ 1048576
+ 1073741824
+ 1610612736
+ 1099511627776
+ 3298534883328000
+ (7 rows)
+
+ -- case insensitive units are supported
+ SELECT pg_size_bytes(size) FROM
+ (VALUES('1'), ('1kb'), ('1mb'), (' 1 Gb'), ('1.5 gB '),
+ ('1tb')) x(size);
+ pg_size_bytes
+ ---------------
+ 1
+ 1024
+ 1048576
+ 1073741824
+ 1610612736
+ 1099511627776
+ (6 rows)
+
+ --should fail
+ SELECT pg_size_bytes('1 AB');
+ ERROR: invalid unit: "AB"
+ HINT: Valid units for this parameter are "kB", "MB", "GB", and "TB".
+ SELECT pg_size_bytes('1 AB A');
+ ERROR: invalid unit: "AB A"
+ HINT: Valid units for this parameter are "kB", "MB", "GB", and "TB".
+ select pg_size_bytes('9223372036854775807.9');
+ ERROR: bigint out of range
+ select pg_size_bytes('1024 bytes');
+ ERROR: invalid unit: "bytes"
+ HINT: Valid units for this parameter are "kB", "MB", "GB", and "TB".
diff --git a/src/test/regress/sql/dbsize.sql b/src/test/regress/sql/dbsize.sql
new file mode 100644
index c118090..5caab78
*** a/src/test/regress/sql/dbsize.sql
--- b/src/test/regress/sql/dbsize.sql
*************** SELECT size, pg_size_pretty(size), pg_si
*** 10,12 ****
--- 10,27 ----
(10.5::numeric), (1000.5::numeric), (1000000.5::numeric),
(1000000000.5::numeric), (1000000000000.5::numeric),
(1000000000000000.5::numeric)) x(size);
+
+ SELECT pg_size_bytes(size) FROM
+ (VALUES('1'), ('1kB'), ('1MB'), (' 1 GB'), ('1.5 GB '),
+ ('1TB'), ('3000 TB')) x(size);
+
+ -- case insensitive units are supported
+ SELECT pg_size_bytes(size) FROM
+ (VALUES('1'), ('1kb'), ('1mb'), (' 1 Gb'), ('1.5 gB '),
+ ('1tb')) x(size);
+
+ --should fail
+ SELECT pg_size_bytes('1 AB');
+ SELECT pg_size_bytes('1 AB A');
+ select pg_size_bytes('9223372036854775807.9');
+ select pg_size_bytes('1024 bytes');
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers