Hi, I have attached a rebased patch if someone wants to review in the next CF. No changes as compared to v4.
Regards, Hunaid Sohail >
From 951f3e9620f0d50fcdaff095652a07d6304b490c Mon Sep 17 00:00:00 2001 From: Hunaid Sohail <hunaid2000@gmail.com> Date: Wed, 30 Oct 2024 12:00:47 +0500 Subject: [PATCH v5] Add roman support for to_number function --- doc/src/sgml/func.sgml | 13 ++- src/backend/utils/adt/formatting.c | 154 +++++++++++++++++++++++++- src/test/regress/expected/numeric.out | 41 +++++++ src/test/regress/sql/numeric.sql | 21 ++++ 4 files changed, 224 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 05f630c6a6..5f91f0401b 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -8628,7 +8628,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); </row> <row> <entry><literal>RN</literal></entry> - <entry>Roman numeral (input between 1 and 3999)</entry> + <entry>Roman numeral (valid for numbers 1 to 3999)</entry> </row> <row> <entry><literal>TH</literal> or <literal>th</literal></entry> @@ -8756,6 +8756,17 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); (e.g., <literal>9.99EEEE</literal> is a valid pattern). </para> </listitem> + + <listitem> + <para> + In <function>to_number</function>, <literal>RN</literal> pattern converts + roman numerals to standard numbers. It is case-insensitive (e.g., <literal>'XIV'</literal>, + <literal>'xiv'</literal>, and <literal>'Xiv'</literal> are all seen as <literal>14</literal>). + <literal>RN</literal> cannot be used in combination with any other formatting patterns + or modifiers, with the exception of <literal>FM</literal>, which is applicable only in + <function>to_char()</function> and is ignored in <function>to_number()</function>. + </para> + </listitem> </itemizedlist> </para> diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 85a7dd4561..5e5e763388 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -49,7 +49,6 @@ * - better number building (formatting) / parsing, now it isn't * ideal code * - use Assert() - * - add support for roman number to standard number conversion * - add support for number spelling * - add support for string to string formatting (we must be better * than Oracle :-), @@ -270,6 +269,31 @@ static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "D static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL}; static const char *const numth[] = {"st", "nd", "rd", "th", NULL}; +/* ---------- + * MACRO: Check if the current and next characters + * form a valid subtraction combination for roman numerals + * ---------- + */ +#define IS_VALID_SUB_COMB(curr, next) \ + (((curr) == 'I' && ((next) == 'V' || (next) == 'X')) || \ + ((curr) == 'X' && ((next) == 'L' || (next) == 'C')) || \ + ((curr) == 'C' && ((next) == 'D' || (next) == 'M'))) + +/* ---------- + * MACRO: Roman number value + * ---------- + */ +#define ROMAN_VAL(r) \ + ((r) == 'I' ? 1 : \ + (r) == 'V' ? 5 : \ + (r) == 'X' ? 10 : \ + (r) == 'L' ? 50 : \ + (r) == 'C' ? 100 : \ + (r) == 'D' ? 500 : \ + (r) == 'M' ? 1000 : 0) + +#define MAX_ROMAN_LEN 15 + /* ---------- * Flags & Options: * ---------- @@ -1074,6 +1098,7 @@ static bool do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std, static char *fill_str(char *str, int c, int max); static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree); static char *int_to_roman(int number); +static int roman_to_int(char* s, int len); static void NUM_prepare_locale(NUMProc *Np); static char *get_last_relevant_decnum(char *num); static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len); @@ -1284,6 +1309,15 @@ NUMDesc_prepare(NUMDesc *num, FormatNode *n) case NUM_rn: case NUM_RN: + if (IS_ROMAN(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"RN\" twice"))); + if (IS_BLANK(num) || IS_LSIGN(num) || IS_BRACKET(num) || + IS_MINUS(num) || IS_PLUS(num) || IS_MULTI(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("\"RN\" is incompatible with other formats"))); num->flag |= NUM_F_ROMAN; break; @@ -5247,7 +5281,107 @@ int_to_roman(int number) return result; } +/* Convert a standard roman numeral to an integer. + * Result is an integer between 1 and 3999. + * + * If input is invalid, return -1. + */ +static int +roman_to_int(char* s, int len) +{ + int repeatCount = 1; + int vCount = 0, lCount = 0, dCount = 0; + bool subtractionEncountered = false; + char lastSubtractedChar = 0; + int total = 0; + + /* Ensure the input is not too long or empty. + * 'MMMDCCCLXXXVIII' (3888) is the longest valid roman numeral (15 chars). + */ + if (len == 0 || len > MAX_ROMAN_LEN) + return -1; + + for (int i = 0; i < len; ++i) + { + char currChar = pg_ascii_toupper(s[i]); + int currValue = ROMAN_VAL(currChar); + + if (currValue == 0) + return -1; + + /* Ensure no character greater than or equal to the subtracted + * character appears after the subtraction. + */ + if (subtractionEncountered && (currValue >= ROMAN_VAL(lastSubtractedChar))) + return -1; + + /* Check for invalid repetitions of characters V, L, or D. */ + if (currChar == 'V') vCount++; + if (currChar == 'L') lCount++; + if (currChar == 'D') dCount++; + if (vCount > 1 || lCount > 1 || dCount > 1) + return -1; + + if (i < len - 1) + { + char nextChar = pg_ascii_toupper(s[i + 1]); + int nextValue = ROMAN_VAL(nextChar); + + if (nextValue == 0) + return -1; + + /* If the current value is less than the next value, + * handle subtraction. Verify valid subtractive + * combinations and update the total accordingly. + */ + if (currValue < nextValue) + { + /* Check for invalid repetitions of characters V, L, or D. */ + if (nextChar == 'V') vCount++; + if (nextChar == 'L') lCount++; + if (nextChar == 'D') dCount++; + if (vCount > 1 || lCount > 1 || dCount > 1) + return -1; + + /* For cases where the same character is repeated + * with subtraction (e.g. 'MCCM' or 'DCCCD'). + */ + if (repeatCount > 1) + return -1; + if (!IS_VALID_SUB_COMB(currChar, nextChar)) + return -1; + + /* Skip the next character as it is part of + * the subtractive combination. + */ + i++; + repeatCount = 1; + subtractionEncountered = true; + lastSubtractedChar = currChar; + total += (nextValue - currValue); + } + else + { + /* For same characters, check for repetition. */ + if (currChar == nextChar) + { + repeatCount++; + if (repeatCount > 3) + return -1; + } + else + repeatCount = 1; + total += currValue; + } + } + /* Add the value of the last character. */ + else + total += currValue; + } + + return total; +} /* ---------- * Locale @@ -5829,9 +5963,21 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, if (IS_ROMAN(Np->Num)) { if (!Np->is_to_char) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("\"RN\" not supported for input"))); + { + int roman_result; + + roman_result = roman_to_int(inout, input_len); + if (roman_result == -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid roman numeral"))); + else + { + Np->Num->pre = sprintf(number, "%d", roman_result); + Np->Num->post = 0; + return number; + } + } Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post = Np->Num->pre = Np->out_pre_spaces = Np->sign = 0; diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out index 0898107ec3..f141f094e6 100644 --- a/src/test/regress/expected/numeric.out +++ b/src/test/regress/expected/numeric.out @@ -2384,6 +2384,47 @@ SELECT to_number('123456', '99999V99'); 1234.560000000000000000 (1 row) +-- Test for correct conversion between numbers and Roman numerals +WITH rows AS + (SELECT i, to_char(i, 'FMRN') AS roman FROM generate_series(1, 3999) AS i) +SELECT + bool_and(to_number(roman, 'RN') = i) as valid +FROM rows; + valid +------- + t +(1 row) + +SELECT to_number('CvIiI', 'rn'); + to_number +----------- + 108 +(1 row) + +SELECT to_number('viv', 'RN'); +ERROR: invalid roman numeral +SELECT to_number('DCCCD', 'RN'); +ERROR: invalid roman numeral +SELECT to_number('XIXL', 'RN'); +ERROR: invalid roman numeral +SELECT to_number('MCCM', 'RN'); +ERROR: invalid roman numeral +SELECT to_number('MMMM', 'RN'); +ERROR: invalid roman numeral +SELECT to_number('CM', 'MIRN'); +ERROR: "RN" is incompatible with other formats +SELECT to_number('CM', 'RNRN'); +ERROR: cannot use "RN" twice +SELECT to_number(' XIV ', 'RN'); +ERROR: invalid roman numeral +SELECT to_number('MMXX ', 'RN'); +ERROR: invalid roman numeral +SELECT to_number('M CC', 'RN'); +ERROR: invalid roman numeral +SELECT to_number('', 'RN'); +ERROR: invalid roman numeral +SELECT to_number(' ', 'RN'); +ERROR: invalid roman numeral RESET lc_numeric; -- -- Input syntax diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql index 9da12c6b9e..1543c52233 100644 --- a/src/test/regress/sql/numeric.sql +++ b/src/test/regress/sql/numeric.sql @@ -1085,6 +1085,27 @@ SELECT to_number('1234.56','L99,999.99'); SELECT to_number('1,234.56','L99,999.99'); SELECT to_number('42nd', '99th'); SELECT to_number('123456', '99999V99'); + +-- Test for correct conversion between numbers and Roman numerals +WITH rows AS + (SELECT i, to_char(i, 'FMRN') AS roman FROM generate_series(1, 3999) AS i) +SELECT + bool_and(to_number(roman, 'RN') = i) as valid +FROM rows; + +SELECT to_number('CvIiI', 'rn'); +SELECT to_number('viv', 'RN'); +SELECT to_number('DCCCD', 'RN'); +SELECT to_number('XIXL', 'RN'); +SELECT to_number('MCCM', 'RN'); +SELECT to_number('MMMM', 'RN'); +SELECT to_number('CM', 'MIRN'); +SELECT to_number('CM', 'RNRN'); +SELECT to_number(' XIV ', 'RN'); +SELECT to_number('MMXX ', 'RN'); +SELECT to_number('M CC', 'RN'); +SELECT to_number('', 'RN'); +SELECT to_number(' ', 'RN'); RESET lc_numeric; -- -- 2.34.1