I'm testing this... Think this'll work better? we don't support any
character sets where a space is 32 but it isnt ASCI.. and 64 isnt EBCDIC?
This should handle those 2 sets.. and if is something else, it bails.
This seems OK? Im testing it now on x86-64... but dont have an ebcdic
target...
Andrew
On 7/26/21 2:39 PM, David Malcolm wrote:
On Mon, 2021-07-26 at 14:21 -0400, Andrew MacLeod via Gcc-patches
wrote:
Remove lower case characters from the range of to_upper and likewise,
upper case characters from to_lower.
I looked at also only adding the upper case characters for which there
is a lower_case character in the range, but it seemed of limited use
Given some odd usage patterns we emit. . Instead, I
fold_using_range::range_of_builtin_callsimply took the
incoming range, removed the "from" character set, and added the "to"
character set. This'll preserve any odd things that are passed into it
while providing the basic functionality.
Easy enough for someone to enhance if they feel so inclined.
Bootstrapped on x86_64-pc-linux-gnu with no regressions. Pushed.
Awkward question: does this work with character sets where there are
non-letter characters between 'a' and 'z' and between 'A' and 'Z' (e.g.
EBCDIC [1])?
For example toupper('~') should return '~', but '~' is between 'a' and
'z' in EBCDIC; likewise tolower('}') should return '}', but '}' is
between 'A' and 'Z' in EBCDIC.
Dave
[1] https://en.wikipedia.org/wiki/EBCDIC
commit bd885b3c27fae450e1b5a880ccd5dd5bd89722c1
Author: Andrew MacLeod <amacl...@redhat.com>
Date: Mon Jul 26 15:38:42 2021 -0400
Handle ASCII and EBCDIC in toupper and tolower ranges.
PR tree-optimization/78888
* gimple-range-fold.cc (get_letter_range): New.
(fold_using_range::range_of_builtin_call): Call get_letter_range.
diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc
index 8465b4a82f6..fa7adce250d 100644
--- a/gcc/gimple-range-fold.cc
+++ b/gcc/gimple-range-fold.cc
@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see
#include "cfgloop.h"
#include "tree-ssa-loop.h"
#include "tree-scalar-evolution.h"
+#include "langhooks.h"
#include "vr-values.h"
#include "range.h"
#include "value-query.h"
@@ -835,6 +836,43 @@ fold_using_range::range_of_builtin_ubsan_call (irange &r, gcall *call,
r.set_varying (type);
}
+// Return TRUE if we recognize the target character set and return the
+// range for lower case and upper case letters.
+
+static bool
+get_letter_range (tree type, irange &lowers, irange &uppers)
+{
+ // ASCII
+ if (lang_hooks.to_target_charset (' ') == 0x20)
+ {
+ lowers = int_range<2> (build_int_cst (type, 'a'),
+ build_int_cst (type, 'z'));
+ uppers = int_range<2> (build_int_cst (type, 'A'),
+ build_int_cst (type, 'Z'));
+ return true;
+ }
+ // EBCDIC
+ else if (lang_hooks.to_target_charset (' ') == 0x40)
+ {
+ lowers = int_range<2> (build_int_cst (type, 'a'),
+ build_int_cst (type, 'i'));
+ lowers.union_ (int_range<2> (build_int_cst (type, 'j'),
+ build_int_cst (type, 'r')));
+ lowers.union_ (int_range<2> (build_int_cst (type, 's'),
+ build_int_cst (type, 'z')));
+ uppers = int_range<2> (build_int_cst (type, 'A'),
+ build_int_cst (type, 'I'));
+ uppers.union_ (int_range<2> (build_int_cst (type, 'J'),
+ build_int_cst (type, 'R')));
+ uppers.union_ (int_range<2> (build_int_cst (type, 'S'),
+ build_int_cst (type, 'Z')));
+ return true;
+ }
+ // Unknown character set.
+ return false;
+}
+
+
// For a builtin in CALL, return a range in R if known and return
// TRUE. Otherwise return FALSE.
@@ -873,13 +911,16 @@ fold_using_range::range_of_builtin_call (irange &r, gcall *call,
arg = gimple_call_arg (call, 0);
if (!src.get_operand (r, arg))
return false;
+
+ int_range<5> lowers;
+ int_range<5> uppers;
+ if (!get_letter_range (type, lowers, uppers))
+ return false;
+
// Return the range passed in without any lower case characters,
// but including all the upper case ones.
- int_range<2> exclude (build_int_cst (type, 'a'),
- build_int_cst (type, 'z'), VR_ANTI_RANGE);
- r.intersect (exclude);
- int_range<2> uppers (build_int_cst (type, 'A'),
- build_int_cst (type, 'Z'));
+ lowers.invert ();
+ r.intersect (lowers);
r.union_ (uppers);
return true;
}
@@ -889,13 +930,16 @@ fold_using_range::range_of_builtin_call (irange &r, gcall *call,
arg = gimple_call_arg (call, 0);
if (!src.get_operand (r, arg))
return false;
+
+ int_range<5> lowers;
+ int_range<5> uppers;
+ if (!get_letter_range (type, lowers, uppers))
+ return false;
+
// Return the range passed in without any upper case characters,
// but including all the lower case ones.
- int_range<2> exclude (build_int_cst (type, 'A'),
- build_int_cst (type, 'Z'), VR_ANTI_RANGE);
- r.intersect (exclude);
- int_range<2> lowers (build_int_cst (type, 'a'),
- build_int_cst (type, 'z'));
+ uppers.invert ();
+ r.intersect (uppers);
r.union_ (lowers);
return true;
}