On 9 July 2015 at 15:46, Richard Biener <rguent...@suse.de> wrote:
> On Thu, 9 Jul 2015, Bernhard Reutner-Fischer wrote:
>
>> gcc/ChangeLog
>>
>> 2015-07-09  Bernhard Reutner-Fischer  <al...@gcc.gnu.org>
>>
>>       * builtins.c (fold_builtin_tolower, fold_builtin_toupper): New
>>       static functions.
>>       (fold_builtin_1): Handle BUILT_IN_TOLOWER, BUILT_IN_TOUPPER.
>
> As I read it you fold tolower (X) to (X) >= target_char_set ('A')
> && (X) <= target_char_set ('Z') ? (X) - target_char_set ('A') +
> target_char_set ('a');
>
> I don't think this can be correct for all locales which need not
> have a lower-case character for all upper-case ones nor do
> all letters having one need to be in the range of 'A' to 'Z'.
>
> Joseph will surely correct me if I am wrong.
>
> What works would eventually be constant folding.

Thinking about it, this is not tolower_l nor towlower so should
probably not be concerned about locales at all.

thanks,
>
> Richard.
>
>> Signed-off-by: Bernhard Reutner-Fischer <rep.dot....@gmail.com>
>> ---
>>  gcc/builtins.c | 99 
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 99 insertions(+)
>>
>> Using the three testcases attached to PR66741 where the -1.c one is using
>> builtins
>> $ for i in 0 1 2;do gcc -o tolower_strcpy-$i tolower_strcpy-$i.c -Ofast -W 
>> -Wall -Wextra -pedantic -DMAIN -msse4.2;done
>>
>> pristine (trunk@225368):
>> # tolower_strcpy-0
>>
>> real  0m6.068s
>> user  0m3.204s
>> sys   0m2.840s
>> # tolower_strcpy-1
>>
>> real  0m8.097s
>> user  0m5.548s
>> sys   0m2.528s
>> # tolower_strcpy-2
>>
>> real  0m3.568s
>> user  0m0.804s
>> sys   0m2.748s
>>
>> trunk@225368 + fold tolower/toupper below
>>
>> # tolower_strcpy-0
>>
>> real  0m6.055s
>> user  0m3.212s
>> sys   0m2.832s
>> # tolower_strcpy-1
>>
>> real  0m5.383s
>> user  0m2.464s
>> sys   0m2.900s
>> # tolower_strcpy-2
>>
>> real  0m3.605s
>> user  0m0.668s
>> sys   0m2.924s
>>
>> The tolower loop now ends up as
>> .L5:
>>         movsbl  (%rbx), %edx
>>         leal    32(%rdx), %ecx
>>         movl    %edx, %eax
>>         subl    $65, %edx
>>         cmpl    $25, %edx
>>         cmovbe  %ecx, %eax
>>         addq    $1, %rbx
>>         movb    %al, -1(%rbx)
>>         cmpq    %rsi, %rbx
>>         jne     .L5
>>
>> instead of the former call
>>
>> .L5:
>>         movsbl  (%rbx), %edi
>>         addq    $1, %rbx
>>         call    tolower
>>         movb    %al, -1(%rbx)
>>         cmpq    %rbp, %rbx
>>         jne     .L5
>>
>> Would something like attached be ok for trunk after proper testing?
>> Advise on the questions inline WRT caching lang_hooks intermediate
>> results?
>> Hints on further steps towards fixing the PR?
>>
>> I think the next step would be to try to teach graphite to fuse the two
>> loops in tolower_strcpy-0.c. Need to look at graphite..
>> Then see how to classify builtins that could be expanded early and what
>> breaks if doing so. This sounds like a potential disaster, fun.
>> Next, see why the vectorizer (or something else) does not pave the way
>> to use SSE instruction as the tolower_strcpy-2.c does.
>>
>> thanks,
>>
>> diff --git a/gcc/builtins.c b/gcc/builtins.c
>> index 5f53342..421c908 100644
>> --- a/gcc/builtins.c
>> +++ b/gcc/builtins.c
>> @@ -204,6 +204,9 @@ static tree fold_builtin_strrchr (location_t, tree, 
>> tree, tree);
>>  static tree fold_builtin_strspn (location_t, tree, tree);
>>  static tree fold_builtin_strcspn (location_t, tree, tree);
>>
>> +static tree fold_builtin_tolower (location_t, tree);
>> +static tree fold_builtin_toupper (location_t, tree);
>> +
>>  static rtx expand_builtin_object_size (tree);
>>  static rtx expand_builtin_memory_chk (tree, rtx, machine_mode,
>>                                     enum built_in_function);
>> @@ -10285,6 +10288,12 @@ fold_builtin_1 (location_t loc, tree fndecl, tree 
>> arg0)
>>      case BUILT_IN_ISDIGIT:
>>        return fold_builtin_isdigit (loc, arg0);
>>
>> +    case BUILT_IN_TOLOWER:
>> +      return fold_builtin_tolower (loc, arg0);
>> +
>> +    case BUILT_IN_TOUPPER:
>> +      return fold_builtin_toupper (loc, arg0);
>> +
>>      CASE_FLT_FN (BUILT_IN_FINITE):
>>      case BUILT_IN_FINITED32:
>>      case BUILT_IN_FINITED64:
>> @@ -11208,6 +11217,96 @@ fold_builtin_strcspn (location_t loc, tree s1, tree 
>> s2)
>>      }
>>  }
>>
>> +
>> +/* Simplify a call to the tolower builtin.  ARG is the argument to the call.
>> +
>> +   Return NULL_TREE if no simplification was possible, otherwise return the
>> +   simplified form of the call as a tree.  */
>> +
>> +static tree
>> +fold_builtin_tolower (location_t loc, tree arg)
>> +{
>> +  if (!validate_arg (arg, INTEGER_TYPE))
>> +    return NULL_TREE;
>> +
>> +  /* Transform tolower(c) -> (unsigned)(c) | 0x20.
>> +
>> +     More specifically:
>> +     unsigned tem = arg - 'A';
>> +     if (tem <= ('Z' - 'A'))
>> +       arg += 'a' - 'A';
>> +     return arg;
>> +   */
>> +  unsigned HOST_WIDE_INT target_A = lang_hooks.to_target_charset ('A');
>> +  unsigned HOST_WIDE_INT target_Z = lang_hooks.to_target_charset ('Z');
>> +  unsigned HOST_WIDE_INT target_a = lang_hooks.to_target_charset ('a');
>> +  if (target_A == 0
>> +      || target_Z == 0
>> +      || target_a == 0)
>> +    return NULL_TREE;
>> +
>> +  arg = fold_convert_loc (loc, unsigned_type_node, arg);
>> +  tree tem = fold_build2 (MINUS_EXPR, unsigned_type_node, arg,
>> +                       build_int_cst (unsigned_type_node, target_A));
>> +  /* ??? x19 and x20 would better live in static storage; Think:
>> +   * static struct static_fold_tolower {uHWI x19, x20; unsigned probe_done};
>> +   */
>> +  unsigned HOST_WIDE_INT x19 = target_Z - target_A;
>> +  unsigned HOST_WIDE_INT x20 = target_a - target_A;
>> +  tem = fold_build2_loc (loc, LE_EXPR, integer_type_node, tem,
>> +                      build_int_cst (unsigned_type_node, x19));
>> +  tem = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, tem,
>> +                      fold_build2 (PLUS_EXPR, unsigned_type_node, arg,
>> +                                   build_int_cst (unsigned_type_node, x20)),
>> +                      arg);
>> +  return fold_convert_loc (loc, integer_type_node, tem);
>> +}
>> +
>> +/* Simplify a call to the toupper builtin.  ARG is the argument to the call.
>> +
>> +   Return NULL_TREE if no simplification was possible, otherwise return the
>> +   simplified form of the call as a tree.  */
>> +
>> +static tree
>> +fold_builtin_toupper (location_t loc, tree arg)
>> +{
>> +  if (!validate_arg (arg, INTEGER_TYPE))
>> +    return NULL_TREE;
>> +
>> +  /* Transform toupper(c) -> (unsigned)(c) ^ 0x20.
>> +
>> +     More specifically:
>> +     unsigned tem = arg - 'a';
>> +     if (tem <= ('z' - 'a'))
>> +       arg -= 'a' - 'A';
>> +     return arg;
>> +   */
>> +  unsigned HOST_WIDE_INT target_A = lang_hooks.to_target_charset ('A');
>> +  unsigned HOST_WIDE_INT target_z = lang_hooks.to_target_charset ('z');
>> +  unsigned HOST_WIDE_INT target_a = lang_hooks.to_target_charset ('a');
>> +  if (target_A == 0
>> +      || target_z == 0
>> +      || target_a == 0)
>> +    return NULL_TREE;
>> +
>> +  arg = fold_convert_loc (loc, unsigned_type_node, arg);
>> +  tree tem = fold_build2 (MINUS_EXPR, unsigned_type_node, arg,
>> +                       build_int_cst (unsigned_type_node, target_a));
>> +  /* ??? x19 and x20 would better live in static storage; Think:
>> +   * static struct static_fold_tolower {uHWI x19, x20; unsigned probe_done};
>> +   */
>> +  unsigned HOST_WIDE_INT x19 = target_z - target_a;
>> +  unsigned HOST_WIDE_INT x20 = target_a - target_A;
>> +  tem = fold_build2_loc (loc, LE_EXPR, integer_type_node, tem,
>> +                      build_int_cst (unsigned_type_node, x19));
>> +  tem = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, tem,
>> +                      fold_build2 (MINUS_EXPR, unsigned_type_node, arg,
>> +                                   build_int_cst (unsigned_type_node, x20)),
>> +                      arg);
>> +  return fold_convert_loc (loc, integer_type_node, tem);
>> +}
>> +
>> +
>>  /* Fold the next_arg or va_start call EXP. Returns true if there was an 
>> error
>>     produced.  False otherwise.  This is done so that we don't output the 
>> error
>>     or warning twice or three times.  */
>>
>
> --
> Richard Biener <rguent...@suse.de>
> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham 
> Norton, HRB 21284 (AG Nuernberg)

Reply via email to