Thank you for bringing that to my attention. Is there a way to subscribe
to cf-bot failures?
Apparently I confused myself with my naming. I attached a patch that
fixes the bug (at least at my cassert test-world run).
Regards
Arne
On 2024-01-22 06:38, Peter Smith wrote:
2024-01 Commitfest.
Hi, This patch has a CF status of "Needs Review" [1], but it seems
like there were CFbot test failures last time it was run [2]. Please
have a look and post an updated version if necessary.
======
[1] https://commitfest.postgresql.org/46/4282/
[2] https://cirrus-ci.com/github/postgresql-cfbot/postgresql/commitfest/46/4282
Kind Regards,
Peter Smith.
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 34a0ec5901..c416be2ea0 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -579,32 +579,13 @@ GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte, int rtelevelsup)
return NULL; /* keep compiler quiet */
}
-/*
- * updateFuzzyAttrMatchState
- * Using Levenshtein distance, consider if column is best fuzzy match.
- */
static void
-updateFuzzyAttrMatchState(int fuzzy_rte_penalty,
- FuzzyAttrMatchState *fuzzystate, RangeTblEntry *rte,
- const char *actual, const char *match, int attnum)
+updateFuzzyAttrMatchStateSingleString(int fuzzy_rte_penalty,
+ FuzzyAttrMatchState *fuzzystate, RangeTblEntry *rte,
+ const char *actual, const char *match, int attnum, int matchlen)
{
- int columndistance;
- int matchlen;
-
- /* Bail before computing the Levenshtein distance if there's no hope. */
- if (fuzzy_rte_penalty > fuzzystate->distance)
- return;
-
- /*
- * Outright reject dropped columns, which can appear here with apparent
- * empty actual names, per remarks within scanRTEForColumn().
- */
- if (actual[0] == '\0')
- return;
-
/* Use Levenshtein to compute match distance. */
- matchlen = strlen(match);
- columndistance =
+ int columndistance =
varstr_levenshtein_less_equal(actual, strlen(actual), match, matchlen,
1, 1, 1,
fuzzystate->distance + 1
@@ -667,6 +648,142 @@ updateFuzzyAttrMatchState(int fuzzy_rte_penalty,
}
}
+static void putUnderscores(char* string, int start, int underscore_amount, int len) {
+ for (int i = 0; start + i < len && i < underscore_amount; i++)
+ string[start + i] = '_';
+}
+
+/*
+ * updateFuzzyAttrMatchState
+ * Using Levenshtein distance, consider if column is best fuzzy match.
+ */
+static void
+updateFuzzyAttrMatchState(int fuzzy_rte_penalty,
+ FuzzyAttrMatchState *fuzzystate, RangeTblEntry *rte,
+ const char *actual, const char *match, int attnum)
+{
+ /* Memory segment to store the current permutation of the match string. */
+ char* tmp_match;
+ int matchlen = strlen(match);
+ /* We keep track how many permutations we have already processed, to avoid long runtimes. */
+ int underscore_permutations_count = 0;
+ /* The location the underscore we currently process within the match string. */
+ int underscore_current = 1;
+ /* Variables to track the amount of underscores delimiting sections */
+ int underscore_amount = 1;
+ int underscore_second_amount = 1;
+
+ /* Bail before computing the Levenshtein distance if there's no hope. */
+ if (fuzzy_rte_penalty > fuzzystate->distance)
+ return;
+
+ /*
+ * Outright reject dropped columns, which can appear here with apparent
+ * empty actual names, per remarks within scanRTEForColumn().
+ */
+ if (actual[0] == '\0')
+ return;
+
+ updateFuzzyAttrMatchStateSingleString(fuzzy_rte_penalty, fuzzystate, rte, actual, match, attnum, matchlen);
+ /* We don't want to permute zero length strings, so check whether the string starts with an underscore. */
+ if (match[0] == '_') {
+ while (underscore_current < matchlen - 1 && match[underscore_current] == '_') {
+ underscore_current++;
+ }
+ }
+ /* Advance to the next underscore. We do this once here to avoid pallocing, if the string does't contain an underscore at all. */
+ while (underscore_current < matchlen - 1 && match[underscore_current] != '_') {
+ underscore_current++;
+ }
+ /*
+ * Check for permuting up to three sections separated by underscores.
+ *
+ * We count the number of underscores here, because we want to know whether we should consider
+ * permuting underscore separated sections.
+ */
+ if (underscore_current < matchlen - 1) {
+ tmp_match = palloc(matchlen + 1);
+ tmp_match[matchlen] = '\0';
+ while (underscore_permutations_count < 300 && underscore_current < matchlen - 1) {
+ /*
+ * If sections contain more than one underscore, we want to swap the sections separated by more than one instead.
+ * There would be no point in swapping zero length strings around.
+ * So we check how many consecutive underscores we have here.
+ */
+ underscore_amount = 1;
+ while (underscore_current + underscore_amount < matchlen && match[underscore_current + underscore_amount] == '_') {
+ underscore_amount++;
+ }
+ /* Stop if we reached the end of the string. */
+ if (underscore_current + underscore_amount == matchlen) {
+ underscore_current = matchlen;
+ break;
+ }
+ /* Consider swapping two sections. */
+ memcpy(tmp_match, &match[underscore_current + underscore_amount], matchlen - underscore_current - underscore_amount);
+ for (int i = 0; i < underscore_amount; i++) {
+ tmp_match[matchlen - underscore_current + i - 1] = '_';
+ }
+ memcpy(&tmp_match[matchlen - 1 - underscore_current + underscore_amount], match, underscore_current);
+ updateFuzzyAttrMatchStateSingleString(fuzzy_rte_penalty + 1, fuzzystate, rte, actual, tmp_match, attnum, matchlen);
+ underscore_permutations_count++;
+ /*
+ * Consider swapping three sections.
+ *
+ * Skip this is if we tried to many permutations (underscore_permutations_count) already, to avoid long times for the user.
+ * Otherwise just loop through all places, where can potentially find an underscore delimited section,
+ * which is delimited by the same amount of underscores.
+ */
+ for (int underscore_second_current = underscore_current + 1 + underscore_amount;
+ underscore_permutations_count < 200 && underscore_second_current < matchlen - underscore_amount;
+ underscore_second_current += underscore_second_amount) {
+ underscore_second_amount = 1;
+ /* Advance to a second underscore delimiter. */
+ if (match[underscore_second_current] != '_')
+ continue;
+ /* Determine how many underscores we have delimiting the potential second section. */
+ while (underscore_second_current < matchlen - underscore_second_amount && match[underscore_second_current + underscore_second_amount] == '_')
+ underscore_second_amount++;
+ /* Advance, if we either reached the end of the string or the amount of underscores does not match. */
+ if (underscore_second_current >= matchlen - underscore_second_amount || underscore_amount != underscore_second_amount)
+ continue;
+ /*
+ * Only consider mirroring permutations, since the three simple rotations are already
+ * (or will be for a later underscore_current) covered above.
+ *
+ * The entries of the permutation matrix tell us, where we should copy the tree segments to.
+ * The zeroth dimension iterates over the permutations, while the first dimension iterates
+ * over the three segments are permuted to.
+ * Considering the string A_B_C the three segments are:
+ * - before the initial underscore sections (A)
+ * - between the underscore sections (B)
+ * - after the later underscore sections (C)
+ */
+ int permutation_matrix[3][3] = {{underscore_second_current - underscore_current, 0, underscore_second_current + underscore_amount},
+ {matchlen - underscore_current, matchlen - underscore_second_current, 0},
+ {0, matchlen - underscore_second_current + underscore_current + underscore_amount, underscore_current + underscore_amount}};
+ /* We now loop over the mirroring permutations one by one. */
+ for (int k = 0; k < 3; k++) {
+ memcpy(&tmp_match[permutation_matrix[k][0]], match, underscore_current); // A
+ putUnderscores(tmp_match, permutation_matrix[k][0] + underscore_current, underscore_amount, matchlen);
+ memcpy(&tmp_match[permutation_matrix[k][1]], &match[underscore_current + underscore_amount], underscore_second_current - underscore_current - underscore_amount); // B
+ putUnderscores(tmp_match, permutation_matrix[k][1] + underscore_second_current - underscore_current - underscore_amount, underscore_amount, matchlen);
+ memcpy(&tmp_match[permutation_matrix[k][2]], &match[underscore_second_current + underscore_amount], matchlen - underscore_second_current - underscore_amount); // C
+ putUnderscores(tmp_match, permutation_matrix[k][2] + matchlen - underscore_second_current - underscore_amount, underscore_amount, matchlen);
+ tmp_match[matchlen] = '\0';
+ updateFuzzyAttrMatchStateSingleString(fuzzy_rte_penalty + 1, fuzzystate, rte, actual, tmp_match, attnum, matchlen);
+ }
+ underscore_permutations_count += 3;
+ }
+ underscore_current++;
+ while (underscore_current < matchlen - 1 && match[underscore_current] != '_') {
+ underscore_current++;
+ }
+ }
+ pfree(tmp_match);
+ }
+}
+
/*
* scanNSItemForColumn
* Search the column names of a single namespace item for the given name.
diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out
index 33a6dceb0e..c16e58ffa9 100644
--- a/src/test/regress/expected/select.out
+++ b/src/test/regress/expected/select.out
@@ -968,3 +968,9 @@ explain (costs off) select * from list_parted_tbl;
(2 rows)
drop table list_parted_tbl;
+-- Test hints for underscore swap in attnames without explicitly creating a table.
+select checked_din_5008 from (select false my_col, false checkd_d_500, true din_5008_checked) foo;
+ERROR: column "checked_din_5008" does not exist
+LINE 1: select checked_din_5008 from (select false my_col, false che...
+ ^
+HINT: Perhaps you meant to reference the column "foo.din_5008_checked".
diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql
index 019f1e7673..8dc1846d7d 100644
--- a/src/test/regress/sql/select.sql
+++ b/src/test/regress/sql/select.sql
@@ -262,3 +262,7 @@ create table list_parted_tbl1 partition of list_parted_tbl
for values in (1) partition by list(b);
explain (costs off) select * from list_parted_tbl;
drop table list_parted_tbl;
+
+-- Test hints for underscore swap in attnames without explicitly creating a table.
+select checked_din_5008 from (select false my_col, false checkd_d_500, true din_5008_checked) foo;
+