[PATCH v4 6/6] libstdc++: make std::is_object dispatch to new built-in traits

2023-06-10 Thread Ken Matsui via Gcc-patches
This patch gets std::is_object to dispatch to new built-in traits,
__is_function, __is_reference, and __is_void.

libstdc++-v3/ChangeLog:
* include/std/type_traits (is_object): Use new built-in traits,
__is_function, __is_reference, and __is_void.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 9 +
 1 file changed, 9 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 412bfb35979..26968e31261 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -682,11 +682,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 
   /// is_object
+#if __has_builtin(__is_function) && __has_builtin(__is_reference) \
+&& __has_builtin(__is_void)
+  template
+struct is_object
+: public __bool_constant
+{ };
+#else
   template
 struct is_object
 : public __not_<__or_, is_reference<_Tp>,
   is_void<_Tp>>>::type
 { };
+#endif
 
   template
 struct is_member_pointer;
-- 
2.41.0



[PATCH v4 4/6] libstdc++: use new built-in trait __is_function for std::is_function

2023-06-10 Thread Ken Matsui via Gcc-patches
This patch gets std::is_function to dispatch to new built-in trait
__is_function.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_function): Use __is_function built-in
trait.
(is_function_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index b2eb4bd3e7f..c0fa748da2e 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -594,6 +594,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 
   /// is_function
+#if __has_builtin(__is_function)
+  template
+struct is_function
+: public __bool_constant<__is_function(_Tp)>
+{ };
+#else
   template
 struct is_function
 : public __bool_constant::value> { };
@@ -605,6 +611,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct is_function<_Tp&&>
 : public false_type { };
+#endif
 
 #define __cpp_lib_is_null_pointer 201309L
 
@@ -3198,7 +3205,7 @@ template 
 template 
   inline constexpr bool is_class_v = __is_class(_Tp);
 template 
-  inline constexpr bool is_function_v = is_function<_Tp>::value;
+  inline constexpr bool is_function_v = __is_function(_Tp);
 template 
   inline constexpr bool is_reference_v = __is_reference(_Tp);
 template 
-- 
2.41.0



[PATCH v4 5/6] c++, libstdc++: implement __is_void built-in trait

2023-06-10 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_void. Since the new built-in
name is __is_void, to avoid unintentional macro replacement, this patch also
involves the removal of the existing __is_void in helper_functions.h and
cpp_type_traits.h and renaming __is_void to is_void in the test file,
pr46567.C.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_void.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_VOID.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/tm/pr46567.C (__is_void): Rename to ...
(is_void): ... this.
* g++.dg/ext/has-builtin-1.C: Test existence of __is_void.
* g++.dg/ext/is_void.C: New test.

libstdc++-v3/ChangeLog:

* include/debug/helper_functions.h (_DiffTraits): Stop using
__is_void.
* include/bits/cpp_type_traits.h (__is_void): Remove unused __is_void.
* include/std/type_traits (is_void_v): Use __is_void built-in
trait.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc  |  3 ++
 gcc/cp/cp-trait.def   |  1 +
 gcc/cp/semantics.cc   |  4 +++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  3 ++
 gcc/testsuite/g++.dg/ext/is_void.C| 35 +++
 gcc/testsuite/g++.dg/tm/pr46567.C |  6 ++--
 libstdc++-v3/include/bits/cpp_type_traits.h   | 15 
 libstdc++-v3/include/debug/helper_functions.h |  5 ++-
 libstdc++-v3/include/std/type_traits  |  2 +-
 9 files changed, 52 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_void.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 927605c6cb7..e8cd98eb2c7 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3757,6 +3757,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_FUNCTION:
   inform (loc, "  %qT is not a function", t1);
   break;
+case CPTK_IS_VOID:
+  inform (loc, "  %qT is not a void type", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 3cd3babc242..8e76668f6ed 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -84,6 +84,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, 
"__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
 DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1)
+DEFTRAIT_EXPR (IS_VOID, "__is_void", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index b976633645a..c4d44413dce 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12075,6 +12075,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_FUNCTION:
   return type_code1 == FUNCTION_TYPE;
 
+case CPTK_IS_VOID:
+  return VOID_TYPE_P (type1);
+
 case CPTK_IS_FINAL:
   return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
 
@@ -12297,6 +12300,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_SAME:
 case CPTK_IS_REFERENCE:
 case CPTK_IS_FUNCTION:
+case CPTK_IS_VOID:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index 90eb00ebf2d..b96cc9e6f50 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -152,3 +152,6 @@
 #if !__has_builtin (__is_function)
 # error "__has_builtin (__is_function) failed"
 #endif
+#if !__has_builtin (__is_void)
+# error "__has_builtin (__is_void) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_void.C 
b/gcc/testsuite/g++.dg/ext/is_void.C
new file mode 100644
index 000..707f0d6875b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_void.C
@@ -0,0 +1,35 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)  \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT); \
+  SA(TRAIT(volatile TYPE) == EXPECT);  \
+  SA(TRAIT(const volatile TYPE) == EXPECT)
+
+SA_TEST_CATEGORY(__is_void, void, true);
+
+SA_TEST_CATEGORY(__is_void, char, false);
+SA_TEST_CATEGORY(__is_void, signed char, false);
+SA_TEST_CATEGORY(__is_void, unsigned char, false);
+SA_TEST_CATEGORY(__is_void, wchar_t, false);
+SA_TEST_CATEGORY(__is_void, short, false);
+SA_TEST_CATEGORY(__is_void, unsigned short, false);

[PATCH v4 3/6] c++: implement __is_function built-in trait

2023-06-10 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_function.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_function.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_FUNCTION.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_function.
* g++.dg/ext/is_function.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 ++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 ++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 gcc/testsuite/g++.dg/ext/is_function.C   | 58 
 5 files changed, 69 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_function.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index f6951ee2670..927605c6cb7 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3754,6 +3754,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_FUNCTION:
+  inform (loc, "  %qT is not a function", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 1e3310cd682..3cd3babc242 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -83,6 +83,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 2f37bc353a1..b976633645a 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12072,6 +12072,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_ENUM:
   return type_code1 == ENUMERAL_TYPE;
 
+case CPTK_IS_FUNCTION:
+  return type_code1 == FUNCTION_TYPE;
+
 case CPTK_IS_FINAL:
   return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
 
@@ -12293,6 +12296,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
 case CPTK_IS_REFERENCE:
+case CPTK_IS_FUNCTION:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index b697673790c..90eb00ebf2d 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -149,3 +149,6 @@
 #if !__has_builtin (__is_reference)
 # error "__has_builtin (__is_reference) failed"
 #endif
+#if !__has_builtin (__is_function)
+# error "__has_builtin (__is_function) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_function.C 
b/gcc/testsuite/g++.dg/ext/is_function.C
new file mode 100644
index 000..2e1594b12ad
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_function.C
@@ -0,0 +1,58 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)  \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT); \
+  SA(TRAIT(volatile TYPE) == EXPECT);  \
+  SA(TRAIT(const volatile TYPE) == EXPECT)
+
+struct A
+{ void fn(); };
+
+template
+struct AHolder { };
+
+template
+struct AHolder
+{ using type = U; };
+
+// Positive tests.
+SA(__is_function(int (int)));
+SA(__is_function(ClassType (ClassType)));
+SA(__is_function(float (int, float, int[], int&)));
+SA(__is_function(int (int, ...)));
+SA(__is_function(bool (ClassType) const));
+SA(__is_function(AHolder::type));
+
+void fn();
+SA(__is_function(decltype(fn)));
+
+// Negative tests.
+SA_TEST_CATEGORY(__is_function, int, false);
+SA_TEST_CATEGORY(__is_function, int*, false);
+SA_TEST_CATEGORY(__is_function, int&, false);
+SA_TEST_CATEGORY(__is_function, void, false);
+SA_TEST_CATEGORY(__is_function, void*, false);
+SA_TEST_CATEGORY(__is_function, void**, false);
+SA_TEST_CATEGORY(__is_function, std::nullptr_t, false);
+
+SA_TEST_CATEGORY(__is_function, AbstractClass, false);
+SA(!__is_function(int(&)(int)));
+SA(!__is_function(int(*)(int)));
+
+SA_TEST_CATEGORY(__is_function, A, false);
+SA_TEST_CATEGORY(__is_function, decltype(::fn), false);
+
+struct FnCallOverload
+{ void operator()(); };
+SA_TEST_CATEGORY(__is_function, FnCallOverload, false);
+
+// Sanity check.

[PATCH v4 2/6] libstdc++: use new built-in trait __is_reference for std::is_reference

2023-06-10 Thread Ken Matsui via Gcc-patches
This patch gets std::is_reference to dispatch to new built-in trait
__is_reference.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_reference): Use __is_reference built-in
trait.
(is_reference_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 0e7a9c9c7f3..b2eb4bd3e7f 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -639,6 +639,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Composite type categories.
 
   /// is_reference
+#if __has_builtin(__is_reference)
+  template
+struct is_reference
+: public __bool_constant<__is_reference(_Tp)>
+{ };
+#else
   template
 struct is_reference
 : public false_type
@@ -653,6 +659,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct is_reference<_Tp&&>
 : public true_type
 { };
+#endif
 
   /// is_arithmetic
   template
@@ -3193,11 +3200,7 @@ template 
 template 
   inline constexpr bool is_function_v = is_function<_Tp>::value;
 template 
-  inline constexpr bool is_reference_v = false;
-template 
-  inline constexpr bool is_reference_v<_Tp&> = true;
-template 
-  inline constexpr bool is_reference_v<_Tp&&> = true;
+  inline constexpr bool is_reference_v = __is_reference(_Tp);
 template 
   inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
 template 
-- 
2.41.0



[PATCH v4 1/6] c++: implement __is_reference built-in trait

2023-06-10 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_reference.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_reference.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_REFERENCE.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_reference.
* g++.dg/ext/is_reference.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 +++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 +++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
 gcc/testsuite/g++.dg/ext/is_reference.C  | 34 
 5 files changed, 45 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_reference.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 8cf0f2d0974..f6951ee2670 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3705,6 +3705,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_HAS_VIRTUAL_DESTRUCTOR:
   inform (loc, "  %qT does not have a virtual destructor", t1);
   break;
+case CPTK_IS_REFERENCE:
+  inform (loc, "  %qT is not a reference", t1);
+  break;
 case CPTK_IS_ABSTRACT:
   inform (loc, "  %qT is not an abstract class", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 8b7fece0cc8..1e3310cd682 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -67,6 +67,7 @@ DEFTRAIT_EXPR (IS_CONVERTIBLE, "__is_convertible", 2)
 DEFTRAIT_EXPR (IS_EMPTY, "__is_empty", 1)
 DEFTRAIT_EXPR (IS_ENUM, "__is_enum", 1)
 DEFTRAIT_EXPR (IS_FINAL, "__is_final", 1)
+DEFTRAIT_EXPR (IS_REFERENCE, "__is_reference", 1)
 DEFTRAIT_EXPR (IS_LAYOUT_COMPATIBLE, "__is_layout_compatible", 2)
 DEFTRAIT_EXPR (IS_LITERAL_TYPE, "__is_literal_type", 1)
 DEFTRAIT_EXPR (IS_NOTHROW_ASSIGNABLE, "__is_nothrow_assignable", 2)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index a2e74a5d2c7..2f37bc353a1 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12075,6 +12075,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_FINAL:
   return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
 
+case CPTK_IS_REFERENCE:
+  return type_code1 == REFERENCE_TYPE;
+
 case CPTK_IS_LAYOUT_COMPATIBLE:
   return layout_compatible_type_p (type1, type2);
 
@@ -12289,6 +12292,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_REFERENCE:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..b697673790c 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_reference)
+# error "__has_builtin (__is_reference) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_reference.C 
b/gcc/testsuite/g++.dg/ext/is_reference.C
new file mode 100644
index 000..b5ce4db7afd
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_reference.C
@@ -0,0 +1,34 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)  \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT); \
+  SA(TRAIT(volatile TYPE) == EXPECT);  \
+  SA(TRAIT(const volatile TYPE) == EXPECT)
+
+// Positive tests.
+SA_TEST_CATEGORY(__is_reference, int&, true);
+SA_TEST_CATEGORY(__is_reference, ClassType&, true);
+SA(__is_reference(int(&)(int)));
+SA_TEST_CATEGORY(__is_reference, int&&, true);
+SA_TEST_CATEGORY(__is_reference, ClassType&&, true);
+SA(__is_reference(int(&&)(int)));
+SA_TEST_CATEGORY(__is_reference, IncompleteClass&, true);
+
+// Negative tests
+SA_TEST_CATEGORY(__is_reference, void, false);
+SA_TEST_CATEGORY(__is_reference, int*, false);
+SA_TEST_CATEGORY(__is_reference, int[3], false);
+SA(!__is_reference(int(int)));
+SA(!__is_reference(int(*const)(int)));
+SA(!__is_reference(int(*volatile)(int)));
+SA(!__is_reference(int(*const volatile)(int)));
+
+// Sanity check.
+SA_TEST_CATEGORY(__is_reference, ClassType, false);
+SA_TEST_CATEGORY(__is_reference, IncompleteClass, false);
-- 
2.41.0



[PATCH v4 0/6] c++, libstdc++: get std::is_object to dispatch to new built-in traits

2023-06-10 Thread Ken Matsui via Gcc-patches
Hi,

This patch series gets std::is_object to dispatch to built-in traits and
implements the following built-in traits, on which std::object depends.

* __is_reference
* __is_function
* __is_void

std::is_object was depending on them with disjunction and negation.

__not_<__or_, is_reference<_Tp>, is_void<_Tp>>>::type

Therefore, this patch uses them directly instead of implementing an additional
built-in trait __is_object, which makes the compiler slightly bigger and
slower.

__bool_constant

This would instantiate only __bool_constant and __bool_constant,
which can be mostly shared. That is, the purpose of built-in traits is
considered as achieved.

Changes in v4

* Used built-in traits for the corresponding predicate-like type traits.

Ken Matsui (6):
  c++: implement __is_reference built-in trait
  libstdc++: use new built-in trait __is_reference for std::is_reference
  c++: implement __is_function built-in trait
  libstdc++: use new built-in trait __is_function for std::is_function
  c++, libstdc++: implement __is_void built-in trait
  libstdc++: make std::is_object dispatch to new built-in traits

 gcc/cp/constraint.cc  |  9 +++
 gcc/cp/cp-trait.def   |  3 +
 gcc/cp/semantics.cc   | 12 
 gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  9 +++
 gcc/testsuite/g++.dg/ext/is_function.C| 58 +++
 gcc/testsuite/g++.dg/ext/is_reference.C   | 34 +++
 gcc/testsuite/g++.dg/ext/is_void.C| 35 +++
 gcc/testsuite/g++.dg/tm/pr46567.C |  6 +-
 libstdc++-v3/include/bits/cpp_type_traits.h   | 15 -
 libstdc++-v3/include/debug/helper_functions.h |  5 +-
 libstdc++-v3/include/std/type_traits  | 33 ---
 11 files changed, 191 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_function.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_reference.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_void.C

-- 
2.41.0



Re: [PATCH] rs6000: Don't use TFmode for 128 bits fp constant in toc [PR110011]

2023-06-10 Thread David Edelsohn via Gcc-patches
On Tue, Jun 6, 2023 at 5:20 AM Kewen.Lin  wrote:

> Hi,
>
> As PR110011 shows, when encoding 128 bits fp constant into
> toc, we adopts REAL_VALUE_TO_TARGET_LONG_DOUBLE which is
> to find the first float mode with LONG_DOUBLE_TYPE_SIZE
> bits of precision, it would be TFmode here.  But the 128
> bits fp constant can be with mode IFmode or KFmode, which
> doesn't necessarily have the same underlying float format
> as the one of TFmode, like this PR exposes, with option
> -mabi=ibmlongdouble TFmode has ibm_extended_format while
> KFmode has ieee_quad_format, mixing up the formats (the
> encoding/decoding ways) would cause unexpected results.
>
> This patch is to make it use constant's own mode instead
> of TFmode for real_to_target call.
>
> Bootstrapped and regtested on powerpc64-linux-gnu P7/P8/P9 and
> powerpc64le-linux-gnu P9 and P10.
>
> I'll push this next week if no objections.
>
> BR,
> Kewen
> -
> PR target/110011
>
> gcc/ChangeLog:
>
> * config/rs6000/rs6000.cc (output_toc): Use its own mode of the
> 128-bit float constant for real_to_target call.
>

The comment wording can be worded better.  Maybe

Use the mode of the 128-bit floating constant itself for real_to_target
call.

This is okay.

Thanks, David


> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/pr110011.c: New test.
> ---
>  gcc/config/rs6000/rs6000.cc |  2 +-
>  gcc/testsuite/gcc.target/powerpc/pr110011.c | 42 +
>  2 files changed, 43 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr110011.c
>
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 3f129ea37d2..330c6a6fa5f 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -17314,7 +17314,7 @@ output_toc (FILE *file, rtx x, int labelno,
> machine_mode mode)
>if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
> REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
>else
> -   REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
> +   real_to_target (k, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
>
>if (TARGET_64BIT)
> {
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110011.c
> b/gcc/testsuite/gcc.target/powerpc/pr110011.c
> new file mode 100644
> index 000..5b04d3e298a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr110011.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target float128_runtime } */
> +/* Force long double to be with IBM format here, to verify
> +   _Float128 constant still uses its own format (IEEE) for
> +   encoding rather than IBM format.  */
> +/* { dg-options "-mfp-in-toc -mabi=ibmlongdouble" } */
> +/* { dg-add-options float128 } */
> +
> +#define MPFR_FLOAT128_MAX 0x1.p+16383f128
> +
> +__attribute__ ((noipa))
> +_Float128 f128_max ()
> +{
> +  return MPFR_FLOAT128_MAX;
> +}
> +
> +typedef union
> +{
> +  int w[4];
> +  _Float128 f128;
> +} U;
> +
> +int main ()
> +{
> +
> +  U umax;
> +  umax.f128 = f128_max ();
> +  /* ieee float128 max:
> + 7ffe   .  */
> +  if (umax.w[1] != 0x || umax.w[2] != 0x)
> +__builtin_abort ();
> +#ifdef __LITTLE_ENDIAN__
> +  if (umax.w[0] != 0x || umax.w[3] != 0x7ffe)
> +__builtin_abort ();
> +#else
> +  if (umax.w[3] != 0x || umax.w[0] != 0x7ffe)
> +__builtin_abort ();
> +#endif
> +
> +  return 0;
> +}
> +
> --
> 2.31.1
>


Re: [PATCH 4/4] rs6000: build constant via li/lis;rldic

2023-06-10 Thread David Edelsohn via Gcc-patches
On Wed, Jun 7, 2023 at 9:56 PM Jiufu Guo  wrote:

> Hi,
>
> This patch checks if a constant is possible to be built by "li;rldic".
> We only need to take care of "negative li", other forms do not need to
> check.
> For example, "negative lis" is just a "negative li" with an additional
> shift.
>
> Bootstrap and regtest pass on ppc64{,le}.
> Is this ok for trunk?
>
> BR,
> Jeff (Jiufu)
>
> gcc/ChangeLog:
>
> * config/rs6000/rs6000.cc (can_be_built_by_li_and_rldic): New
> function.
> (rs6000_emit_set_long_const): Call can_be_built_by_li_and_rldic.
>

This is okay.

Do you have any measurement of how expensive it is to test all of these
additional methods to generate a constant?  How much does this affect the
compile time?

Thanks, David



>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/const-build.c: Add more tests.
> ---
>  gcc/config/rs6000/rs6000.cc   | 61 ++-
>  .../gcc.target/powerpc/const-build.c  | 28 +
>  2 files changed, 88 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 2a3fa733b45..cd04b6b5c82 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10387,6 +10387,64 @@ can_be_built_by_li_lis_and_rldicr (HOST_WIDE_INT
> c, int *shift,
>return false;
>  }
>
> +/* Check if value C can be built by 2 instructions: one is 'li', another
> is
> +   rldic.
> +
> +   If so, *SHIFT is set to the 'shift' operand of rldic; and *MASK is set
> +   to the mask value about the 'mb' operand of rldic; and return true.
> +   Return false otherwise.  */
> +
> +static bool
> +can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT
> *mask)
> +{
> +  /* There are 49 successive ones in the negative value of 'li'.  */
> +  int ones = 49;
> +
> +  /* 1..1xx1..1: negative value of li --> 0..01..1xx0..0:
> + right bits are shifted as 0's, and left 1's(and x's) are cleaned.  */
> +  int tz = ctz_hwi (c);
> +  int lz = clz_hwi (c);
> +  int middle_ones = clz_hwi (~(c << lz));
> +  if (tz + lz + middle_ones >= ones)
> +{
> +  *mask = ((1LL << (HOST_BITS_PER_WIDE_INT - tz - lz)) - 1LL) << tz;
> +  *shift = tz;
> +  return true;
> +}
> +
> +  /* 1..1xx1..1 --> 1..1xx0..01..1: some 1's(following x's) are cleaned.
> */
> +  int leading_ones = clz_hwi (~c);
> +  int tailing_ones = ctz_hwi (~c);
> +  int middle_zeros = ctz_hwi (c >> tailing_ones);
> +  if (leading_ones + tailing_ones + middle_zeros >= ones)
> +{
> +  *mask = ~(((1ULL << middle_zeros) - 1ULL) << tailing_ones);
> +  *shift = tailing_ones + middle_zeros;
> +  return true;
> +}
> +
> +  /* xx1..1xx: --> xx0..01..1xx: some 1's(following x's) are cleaned. */
> +  /* Get the position for the first bit of successive 1.
> + The 24th bit would be in successive 0 or 1.  */
> +  HOST_WIDE_INT low_mask = (1LL << 24) - 1LL;
> +  int pos_first_1 = ((c & (low_mask + 1)) == 0)
> + ? clz_hwi (c & low_mask)
> + : HOST_BITS_PER_WIDE_INT - ctz_hwi (~(c | low_mask));
> +  middle_ones = clz_hwi (~c << pos_first_1);
> +  middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_first_1));
> +  if (pos_first_1 < HOST_BITS_PER_WIDE_INT
> +  && middle_ones + middle_zeros < HOST_BITS_PER_WIDE_INT
> +  && middle_ones + middle_zeros >= ones)
> +{
> +  *mask = ~(((1ULL << middle_zeros) - 1LL)
> +   << (HOST_BITS_PER_WIDE_INT - pos_first_1));
> +  *shift = HOST_BITS_PER_WIDE_INT - pos_first_1 + middle_zeros;
> +  return true;
> +}
> +
> +  return false;
> +}
> +
>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
> Output insns to set DEST equal to the constant C as a series of
> lis, ori and shl instructions.  */
> @@ -10435,7 +10493,8 @@ rs6000_emit_set_long_const (rtx dest,
> HOST_WIDE_INT c)
>  }
>else if (can_be_built_by_li_lis_and_rotldi (c, , )
>|| can_be_built_by_li_lis_and_rldicl (c, , )
> -  || can_be_built_by_li_lis_and_rldicr (c, , ))
> +  || can_be_built_by_li_lis_and_rldicr (c, , )
> +  || can_be_built_by_li_and_rldic (c, , ))
>  {
>temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>unsigned HOST_WIDE_INT imm = (c | ~mask);
> diff --git a/gcc/testsuite/gcc.target/powerpc/const-build.c
> b/gcc/testsuite/gcc.target/powerpc/const-build.c
> index 8c209921d41..b503ee31c7c 100644
> --- a/gcc/testsuite/gcc.target/powerpc/const-build.c
> +++ b/gcc/testsuite/gcc.target/powerpc/const-build.c
> @@ -82,6 +82,29 @@ lis_rldicr_12 (void)
>return 0x5310LL;
>  }
>
> +long long NOIPA
> +li_rldic_13 (void)
> +{
> +  return 0x000f8531LL;
> +}
> +long long NOIPA
> +li_rldic_14 (void)
> +{
> +  return 0x853100ffLL;
> +}
> +
> +long long NOIPA
> +li_rldic_15 (void)
> +{
> +  return 0x8031LL;
> +}
> +
> +long long NOIPA
> +li_rldic_16 (void)
> +{
> 

Re: [PATCH 3/4] rs6000: build constant via li/lis;rldicl/rldicr

2023-06-10 Thread David Edelsohn via Gcc-patches
On Wed, Jun 7, 2023 at 9:56 PM Jiufu Guo  wrote:

> Hi,
>
> This patch checks if a constant is possible left/right cleaned on a rotated
> value from a negative value of "li/lis".  If so, we can build the constant
> through "li/lis ; rldicl/rldicr".
>
> Bootstrap and regtest pass on ppc64{,le}.
> Is this ok for trunk?
>
> BR,
> Jeff (Jiufu)
>
> gcc/ChangeLog:
>
> * config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rldicl): New
> function.
> (can_be_built_by_li_lis_and_rldicr): New function.
> (rs6000_emit_set_long_const): Call
> can_be_built_by_li_lis_and_rldicr and
> can_be_built_by_li_lis_and_rldicl.
>

This is okay.  See below.

Thanks, David



>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/const-build.c: Add more tests.
> ---
>  gcc/config/rs6000/rs6000.cc   | 61 ++-
>  .../gcc.target/powerpc/const-build.c  | 44 +
>  2 files changed, 104 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 03cd9d5e952..2a3fa733b45 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10332,6 +10332,61 @@ can_be_built_by_li_lis_and_rotldi (HOST_WIDE_INT
> c, int *shift,
>return false;
>  }
>
> +/* Check if value C can be built by 2 instructions: one is 'li or lis',
> +   another is rldicl.
> +
> +   If so, *SHIFT is set to the shift operand of rldicl, and *MASK is set
> to
> +   the mask operand of rldicl, and return true.
> +   Return false otherwise.  */
> +
> +static bool
> +can_be_built_by_li_lis_and_rldicl (HOST_WIDE_INT c, int *shift,
> +  HOST_WIDE_INT *mask)
> +{
> +  /* Leading zeros may be cleaned by rldicl with a mask.  Change leading
> zeros
> + to ones and then recheck it.  */
> +  int lz = clz_hwi (c);
> +  HOST_WIDE_INT unmask_c
> += c | (HOST_WIDE_INT_M1U << (HOST_BITS_PER_WIDE_INT - lz));
> +  int n;
> +  if (can_be_rotated_to_negative_li (unmask_c, )
>

using can_be_rotated_to_lowbits (~unmask_c, 15, )

Maybe Segher would want the abstraction, but it seems more wasteful to me.


> +  || can_be_rotated_to_negative_lis (unmask_c, ))
> +{
> +  *mask = HOST_WIDE_INT_M1U >> lz;
> +  *shift = n == 0 ? 0 : HOST_BITS_PER_WIDE_INT - n;
> +  return true;
> +}
> +
> +  return false;
> +}
> +
> +/* Check if value C can be built by 2 instructions: one is 'li or lis',
> +   another is rldicr.
> +
> +   If so, *SHIFT is set to the shift operand of rldicr, and *MASK is set
> to
> +   the mask operand of rldicr, and return true.
> +   Return false otherwise.  */
> +
> +static bool
> +can_be_built_by_li_lis_and_rldicr (HOST_WIDE_INT c, int *shift,
> +  HOST_WIDE_INT *mask)
> +{
> +  /* Tailing zeros may be cleaned by rldicr with a mask.  Change tailing
> zeros
> + to ones and then recheck it.  */
> +  int tz = ctz_hwi (c);
> +  HOST_WIDE_INT unmask_c = c | ((HOST_WIDE_INT_1U << tz) - 1);
> +  int n;
> +  if (can_be_rotated_to_negative_li (unmask_c, )
> +  || can_be_rotated_to_negative_lis (unmask_c, ))
> +{
> +  *mask = HOST_WIDE_INT_M1U << tz;
> +  *shift = HOST_BITS_PER_WIDE_INT - n;
> +  return true;
> +}
> +
> +  return false;
> +}
> +
>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
> Output insns to set DEST equal to the constant C as a series of
> lis, ori and shl instructions.  */
> @@ -10378,7 +10433,9 @@ rs6000_emit_set_long_const (rtx dest,
> HOST_WIDE_INT c)
>emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
>  GEN_INT ((ud2 ^ 0x) << 16)));
>  }
> -  else if (can_be_built_by_li_lis_and_rotldi (c, , ))
> +  else if (can_be_built_by_li_lis_and_rotldi (c, , )
> +  || can_be_built_by_li_lis_and_rldicl (c, , )
> +  || can_be_built_by_li_lis_and_rldicr (c, , ))
>  {
>temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>unsigned HOST_WIDE_INT imm = (c | ~mask);
> @@ -10387,6 +10444,8 @@ rs6000_emit_set_long_const (rtx dest,
> HOST_WIDE_INT c)
>emit_move_insn (temp, GEN_INT (imm));
>if (shift != 0)
> temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
> +  if (mask != HOST_WIDE_INT_M1)
> +   temp = gen_rtx_AND (DImode, temp, GEN_INT (mask));
>emit_move_insn (dest, temp);
>  }
>else if (ud3 == 0 && ud4 == 0)
> diff --git a/gcc/testsuite/gcc.target/powerpc/const-build.c
> b/gcc/testsuite/gcc.target/powerpc/const-build.c
> index c38a1dd91f2..8c209921d41 100644
> --- a/gcc/testsuite/gcc.target/powerpc/const-build.c
> +++ b/gcc/testsuite/gcc.target/powerpc/const-build.c
> @@ -46,6 +46,42 @@ lis_rotldi_6 (void)
>return 0x5318LL;
>  }
>
> +long long NOIPA
> +li_rldicl_7 (void)
> +{
> +  return 0x3ffa1LL;
> +}
> +
> +long long NOIPA
> +li_rldicl_8 (void)
> +{
> +  return 0xff8531LL;
> +}
> +
> 

Re: [PATCH 2/4] rs6000: build constant via lis;rotldi

2023-06-10 Thread David Edelsohn via Gcc-patches
On Wed, Jun 7, 2023 at 9:55 PM Jiufu Guo  wrote:

> Hi,
>
> This patch checks if a constant is possible to be rotated to/from a
> negative
> value from "lis".  If so, we could use "lis;rotldi" to build it.
> The positive value of "lis" does not need to be analyzed.  Because if a
> constant can be rotated from the positive value of "lis", it also can be
> rotated from a positive value of "li".
>
> Bootstrap and regtest pass on ppc64{,le}.
> Is this ok for trunk?
>
> BR,
> Jeff (Jiufu)
>
> gcc/ChangeLog:
>
> * config/rs6000/rs6000.cc (can_be_rotated_to_negative_lis): New
> function.
> (can_be_built_by_li_and_rotldi): Rename to ...
> (can_be_built_by_li_lis_and_rotldi): ... this function.
> (rs6000_emit_set_long_const): Call
> can_be_built_by_li_lis_and_rotldi.
>

This patch is okay.

Thanks, David


>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/const-build.c: Add more tests.
> ---
>  gcc/config/rs6000/rs6000.cc   | 42 ---
>  .../gcc.target/powerpc/const-build.c  | 16 ++-
>  2 files changed, 52 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 1dd0072350a..03cd9d5e952 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10278,19 +10278,51 @@ can_be_rotated_to_negative_li (HOST_WIDE_INT c,
> int *rot)
>return can_be_rotated_to_lowbits (~c, 15, rot);
>  }
>
> -/* Check if value C can be built by 2 instructions: one is 'li', another
> is
> -   rotldi.
> +/* Check if C can be rotated to a negative value which 'lis' instruction
> is
> +   able to load: 1..1xx0..0.  If so, set *ROT to the number by which C is
> +   rotated, and return true.  Return false otherwise.  */
> +
> +static bool
> +can_be_rotated_to_negative_lis (HOST_WIDE_INT c, int *rot)
> +{
> +  /* case a. 1..1xxx0..01..1: up to 15 x's, at least 16 0's.  */
> +  int leading_ones = clz_hwi (~c);
> +  int tailing_ones = ctz_hwi (~c);
> +  int middle_zeros = ctz_hwi (c >> tailing_ones);
> +  if (middle_zeros >= 16 && leading_ones + tailing_ones >= 33)
> +{
> +  *rot = HOST_BITS_PER_WIDE_INT - tailing_ones;
> +  return true;
> +}
> +
> +  /* case b. xx0..01..1xx: some of 15 x's (and some of 16 0's) are
> + rotated over the highest bit.  */
> +  int pos_one = clz_hwi ((c << 16) >> 16);
> +  middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one));
> +  int middle_ones = clz_hwi (~(c << pos_one));
> +  if (middle_zeros >= 16 && middle_ones >= 33)
> +{
> +  *rot = pos_one;
> +  return true;
> +}
> +
> +  return false;
> +}
> +
> +/* Check if value C can be built by 2 instructions: one is 'li or lis',
> +   another is rotldi.
>
> If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
> is set to -1, and return true.  Return false otherwise.  */
>
>  static bool
> -can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift,
> +can_be_built_by_li_lis_and_rotldi (HOST_WIDE_INT c, int *shift,
>HOST_WIDE_INT *mask)
>  {
>int n;
>if (can_be_rotated_to_positive_li (c, )
> -  || can_be_rotated_to_negative_li (c, ))
> +  || can_be_rotated_to_negative_li (c, )
> +  || can_be_rotated_to_negative_lis (c, ))
>  {
>*mask = HOST_WIDE_INT_M1;
>*shift = HOST_BITS_PER_WIDE_INT - n;
> @@ -10346,7 +10378,7 @@ rs6000_emit_set_long_const (rtx dest,
> HOST_WIDE_INT c)
>emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
>  GEN_INT ((ud2 ^ 0x) << 16)));
>  }
> -  else if (can_be_built_by_li_and_rotldi (c, , ))
> +  else if (can_be_built_by_li_lis_and_rotldi (c, , ))
>  {
>temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>unsigned HOST_WIDE_INT imm = (c | ~mask);
> diff --git a/gcc/testsuite/gcc.target/powerpc/const-build.c
> b/gcc/testsuite/gcc.target/powerpc/const-build.c
> index 70f095f6bf2..c38a1dd91f2 100644
> --- a/gcc/testsuite/gcc.target/powerpc/const-build.c
> +++ b/gcc/testsuite/gcc.target/powerpc/const-build.c
> @@ -34,14 +34,28 @@ li_rotldi_4 (void)
>return 0x2194LL;
>  }
>
> +long long NOIPA
> +lis_rotldi_5 (void)
> +{
> +  return 0x8531LL;
> +}
> +
> +long long NOIPA
> +lis_rotldi_6 (void)
> +{
> +  return 0x5318LL;
> +}
> +
>  struct fun arr[] = {
>{li_rotldi_1, 0x75310LL},
>{li_rotldi_2, 0x2164LL},
>{li_rotldi_3, 0x8531LL},
>{li_rotldi_4, 0x2194LL},
> +  {lis_rotldi_5, 0x8531LL},
> +  {lis_rotldi_6, 0x5318LL},
>  };
>
> -/* { dg-final { scan-assembler-times {\mrotldi\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mrotldi\M} 6 } } */
>
>  int
>  main ()
> --
> 2.39.1
>
>


Re: [PATCH 1/4] rs6000: build constant via li;rotldi

2023-06-10 Thread David Edelsohn via Gcc-patches
On Wed, Jun 7, 2023 at 9:55 PM Jiufu Guo  wrote:

> Hi,
>
> This patch checks if a constant is possible to be rotated to/from a
> positive
> or negative value from "li". If so, we could use "li;rotldi" to build it.
>
> Bootstrap and regtest pass on ppc64{,le}.
> Is this ok for trunk?
>
> BR,
> Jeff (Jiufu)
>
> gcc/ChangeLog:
>
> * config/rs6000/rs6000.cc (can_be_rotated_to_positive_li): New
> function.
> (can_be_rotated_to_negative_li): New function.
> (can_be_built_by_li_and_rotldi): New function.
> (rs6000_emit_set_long_const): Call can_be_built_by_li_and_rotldi.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/const-build.c: New test.
> ---
>  gcc/config/rs6000/rs6000.cc   | 64 +--
>  .../gcc.target/powerpc/const-build.c  | 54 
>  2 files changed, 112 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/const-build.c
>
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 42f49e4a56b..1dd0072350a 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10258,6 +10258,48 @@ rs6000_emit_set_const (rtx dest, rtx source)
>return true;
>  }
>
> +/* Check if C can be rotated to a positive value which 'li' instruction
> +   is able to load.  If so, set *ROT to the number by which C is rotated,
> +   and return true.  Return false otherwise.  */
> +
> +static bool
> +can_be_rotated_to_positive_li (HOST_WIDE_INT c, int *rot)
> +{
> +  /* 49 leading zeros and 15 low bits on the positive value
> + generated by 'li' instruction.  */
> +  return can_be_rotated_to_lowbits (c, 15, rot);
> +}
> +
> +/* Like can_be_rotated_to_positive_li, but check the negative value of
> 'li'.  */
> +
> +static bool
> +can_be_rotated_to_negative_li (HOST_WIDE_INT c, int *rot)
> +{
> +  return can_be_rotated_to_lowbits (~c, 15, rot);
> +}
> +
> +/* Check if value C can be built by 2 instructions: one is 'li', another
> is
> +   rotldi.
> +
> +   If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
> +   is set to -1, and return true.  Return false otherwise.  */
> +
>

I look at this feature and it's good, but I don't fully understand the
benefit of this level of abstraction.  Ideally all of the above functions
would be inlined.  They aren't reused.


> +static bool
> +can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift,
> +  HOST_WIDE_INT *mask)
> +{
> +  int n;
> +  if (can_be_rotated_to_positive_li (c, )
> +  || can_be_rotated_to_negative_li (c, ))
>

Why not

/* Check if C or ~C can be rotated to a positive or negative value
which 'li' instruction is able to load.  */
if (can_be_rotated_to_lowbits (c, 15, )
|| can_be_rotated_to_lowbits (~c, 15, ))
...

This is a style of software engineering, but it seems overkill to me when
the function is a single line that tail calls another function.  Am I
missing something?

The rest of this patch looks good.

Thanks, David


> +{
> +  *mask = HOST_WIDE_INT_M1;
> +  *shift = HOST_BITS_PER_WIDE_INT - n;
> +  return true;
> +}
> +
> +  return false;
> +}
> +
>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
> Output insns to set DEST equal to the constant C as a series of
> lis, ori and shl instructions.  */
> @@ -10266,15 +10308,14 @@ static void
>  rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>  {
>rtx temp;
> +  int shift;
> +  HOST_WIDE_INT mask;
>HOST_WIDE_INT ud1, ud2, ud3, ud4;
>
>ud1 = c & 0x;
> -  c = c >> 16;
> -  ud2 = c & 0x;
> -  c = c >> 16;
> -  ud3 = c & 0x;
> -  c = c >> 16;
> -  ud4 = c & 0x;
> +  ud2 = (c >> 16) & 0x;
> +  ud3 = (c >> 32) & 0x;
> +  ud4 = (c >> 48) & 0x;
>
>if ((ud4 == 0x && ud3 == 0x && ud2 == 0x && (ud1 & 0x8000))
>|| (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
> @@ -10305,6 +10346,17 @@ rs6000_emit_set_long_const (rtx dest,
> HOST_WIDE_INT c)
>emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
>  GEN_INT ((ud2 ^ 0x) << 16)));
>  }
> +  else if (can_be_built_by_li_and_rotldi (c, , ))
> +{
> +  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +  unsigned HOST_WIDE_INT imm = (c | ~mask);
> +  imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
> +
> +  emit_move_insn (temp, GEN_INT (imm));
> +  if (shift != 0)
> +   temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
> +  emit_move_insn (dest, temp);
> +}
>else if (ud3 == 0 && ud4 == 0)
>  {
>temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> diff --git a/gcc/testsuite/gcc.target/powerpc/const-build.c
> b/gcc/testsuite/gcc.target/powerpc/const-build.c
> new file mode 100644
> index 000..70f095f6bf2
> --- /dev/null
> +++ 

[PATCH v1] RISC-V: Support RVV FP16 MISC vlmul ext intrinsic API

2023-06-10 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch support the intrinsic API of FP16 ZVFHMIN vlmul ext. Aka:

vfloat16*_t <==> vfloat16*_t.

>From the user's perspective, it is reasonable to do some type convert
between vfloat16*_t and vfloat16*_t when only ZVFHMIN is enabled.

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-types.def
(vfloat16mf4_t): Add type to X2/X4/X8/X16/X32 vlmul ext ops.
(vfloat16mf2_t): Ditto.
(vfloat16m1_t): Ditto.
(vfloat16m2_t): Ditto.
(vfloat16m4_t): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c: Add new test cases.
* gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c: Add new test cases.
---
 .../riscv/riscv-vector-builtins-types.def | 15 ++
 .../riscv/rvv/base/zvfh-over-zvfhmin.c| 18 +--
 .../riscv/rvv/base/zvfhmin-intrinsic.c| 54 +--
 3 files changed, 79 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def 
b/gcc/config/riscv/riscv-vector-builtins-types.def
index 589ea532727..db8e61fea6a 100644
--- a/gcc/config/riscv/riscv-vector-builtins-types.def
+++ b/gcc/config/riscv/riscv-vector-builtins-types.def
@@ -978,6 +978,11 @@ DEF_RVV_X2_VLMUL_EXT_OPS (vuint32m4_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint64m1_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | 
RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16m2_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16m4_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
@@ -1014,6 +1019,10 @@ DEF_RVV_X4_VLMUL_EXT_OPS (vuint32m1_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint32m2_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint64m1_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | 
RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16m2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_X4_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_X4_VLMUL_EXT_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
@@ -1040,6 +1049,9 @@ DEF_RVV_X8_VLMUL_EXT_OPS (vuint16m1_t, 0)
 DEF_RVV_X8_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_X8_VLMUL_EXT_OPS (vuint32m1_t, 0)
 DEF_RVV_X8_VLMUL_EXT_OPS (vuint64m1_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X8_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | 
RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X8_VLMUL_EXT_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_X8_VLMUL_EXT_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_X8_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_X8_VLMUL_EXT_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_X8_VLMUL_EXT_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64)
@@ -1056,6 +1068,8 @@ DEF_RVV_X16_VLMUL_EXT_OPS (vuint8mf2_t, 0)
 DEF_RVV_X16_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_X16_VLMUL_EXT_OPS (vuint16mf2_t, 0)
 DEF_RVV_X16_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X16_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | 
RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X16_VLMUL_EXT_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_X16_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_MIN_VLEN_64)
 
 DEF_RVV_X32_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
@@ -1064,6 +1078,7 @@ DEF_RVV_X32_VLMUL_EXT_OPS (vint16mf4_t, 
RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_X32_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_X32_VLMUL_EXT_OPS (vuint8mf4_t, 0)
 DEF_RVV_X32_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X32_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | 
RVV_REQUIRE_MIN_VLEN_64)
 
 DEF_RVV_X64_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_X64_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c
index d5bcdd5156a..ff9e0156a68 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c
@@ -37,13 +37,23 @@ vuint16m8_t test_vreinterpret_v_f16m8_u16m8(vfloat16m8_t 
src) {
   return __riscv_vreinterpret_v_f16m8_u16m8(src);
 }
 
+vfloat16mf2_t 

RE: [PATCH V3] RISC-V: Rework Phase 5 && Phase 6 of VSETVL PASS

2023-06-10 Thread Li, Pan2 via Gcc-patches
Committed, thanks Jeff.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Sunday, June 11, 2023 12:49 AM
To: juzhe.zh...@rivai.ai; gcc-patches@gcc.gnu.org
Cc: kito.ch...@sifive.com; pal...@rivosinc.com; rdapp@gmail.com
Subject: Re: [PATCH V3] RISC-V: Rework Phase 5 && Phase 6 of VSETVL PASS



On 6/9/23 17:11, juzhe.zh...@rivai.ai wrote:
> From: Juzhe-Zhong 
> 
> Address comments from Jeff.
> 
> This patch is to rework Phase 5 && Phase 6 of VSETVL PASS since Phase 
> 5 && Phase 6 are quite messy and cause some bugs discovered by my 
> downstream auto-vectorization test-generator.
> 
> Before this patch.
> 
> Phase 5 is cleanup_insns is the function remove AVL operand dependency from 
> each RVV instruction.
> E.g. vadd.vv (use a5), after Phase 5, > vadd.vv (use const_int 0). 
> Since "a5" is used in "vsetvl" instructions and after the correct 
> "vsetvl" instructions are inserted, each RVV instruction doesn't need AVL 
> operand "a5" anymore. Then, we remove this operand dependency helps for the 
> following scheduling PASS.
> 
> Phase 6 is propagate_avl do the following 2 things:
> 1. Local && Global user vsetvl instructions optimization.
> E.g.
>vsetvli a2, a2, e8, mf8   ==> Change it into vsetvli a2, a2, e32, 
> mf2
>vsetvli zero,a2, e32, mf2  ==> eliminate 2. Optimize user 
> vsetvl from "vsetvl a2,a2" into "vsetvl zero,a2" if "a2" is not used by any 
> instructions.
> Since from Phase 1 ~ Phase 4 which inserts "vsetvli" instructions base 
> on LCM which change the CFG, I re-new a new RTL_SSA framework (which is more 
> expensive than just using DF) for Phase 6 and optmize user vsetvli base on 
> the new RTL_SSA.
> 
> There are 2 issues in Phase 5 && Phase 6:
> 1. local_eliminate_vsetvl_insn was introduced by @kito which can do better 
> local user vsetvl optimizations better than
> Phase 6 do, such approach doesn't need to re-new the RTL_SSA framework. 
> So the local user vsetvli instructions optimizaiton
> in Phase 6 is redundant and should be removed.
> 2. A bug discovered by my downstream auto-vectorization test-generator (I 
> can't put the test in this patch since we are missing autovec
> patterns for it so we can't use the upstream GCC directly reproduce such 
> issue but I will remember put it back after I support the
> necessary autovec patterns). Such bug is causing by using RTL_SSA re-new 
> framework. The issue description is this:
> 
> Before Phase 6:
> ...
> insn1: vsetlvi a3, 17 <== generated by SELECT_VL auto-vec pattern.
> slli a4,a3,3
> ...
> insn2: vsetvli zero, a3, ...
> load (use const_int 0, before Phase 5, it's using a3, but the use of "a3" 
> is removed in Phase 5)
> ...
> 
> In Phase 6, we iterate to insn2, then get the def of "a3" which is the insn1.
> insn2 is the vsetvli instruction inserted in Phase 4 which is not 
> included in the RLT_SSA framework even though we renew it (I didn't take a 
> look at it and I don't think we need to now).
> Base on this situation, the def_info of insn2 has the information 
> "set->single_nondebug_insn_use ()"
> which return true. Obviously, this information is not correct, since insn1 
> has aleast 2 uses:
> 1). slli a4,a3,3 2).insn2: vsetvli zero, a3, ... Then, the test 
> generated by my downstream test-generator execution test failed.
> 
> Conclusion of RTL_SSA framework:
> Before this patch, we initialize RTL_SSA 2 times. One is at the 
> beginning of the VSETVL PASS which is absolutely correct, the other is re-new 
> after Phase 4 (LCM) has incorrect information that causes bugs.
> 
> Besides, we don't like to initialize RTL_SSA second time it seems to be a 
> waste since we just need to do a little optimization.
> 
> Base on all circumstances I described above, I rework and reorganize Phase 5 
> && Phase 6 as follows:
> 1. Phase 5 is called ssa_post_optimization which is doing the optimization 
> base on the RTL_SSA information (The RTL_SSA is initialized
> at the beginning of the VSETVL PASS, no need to re-new it again). This 
> phase includes 3 optimizaitons:
> 1). local_eliminate_vsetvl_insn we already have (no change).
> 2). global_eliminate_vsetvl_insn ---> new optimizaiton splitted from 
> orignal Phase 6 but with more powerful and reliable implementation.
>E.g.
>void f(int8_t *base, int8_t *out, size_t vl, size_t m, size_t k) {
>  size_t avl;
>  if (m > 100)
>avl = __riscv_vsetvl_e16mf4(vl << 4);
>  else
>avl = __riscv_vsetvl_e32mf2(vl >> 8);
>  for (size_t i = 0; i < m; i++) {
>vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i, avl);
>v0 = __riscv_vadd_vv_i8mf8 (v0, v0, avl);
>__riscv_vse8_v_i8mf8(out + i, v0, avl);
>  }
>}
> 
>This example failed to global user vsetvl optimize before this patch:
>f:
>li  a5,100
>   

[GCC 13 PATCH] PR target/109973: CCZmode and CCCmode variants of [v]ptest.

2023-06-10 Thread Roger Sayle

This is a backport of the fixes for PR target/109973 and PR target/110083.

This backport to the releases/gcc-13 branch has been tested on
x86_64-pc-linux-gnu with make bootstrap and make -k check, both with and
without --target_board=unix{-m32} with no new failures.  Ok for gcc-13,
or should we just close PR 109973 in Bugzilla?


2023-06-10  Roger Sayle  
Uros Bizjak  

gcc/ChangeLog
PR target/109973
PR target/110083
* config/i386/i386-builtin.def (__builtin_ia32_ptestz128): Use new
CODE_for_sse4_1_ptestzv2di.
(__builtin_ia32_ptestc128): Use new CODE_for_sse4_1_ptestcv2di.
(__builtin_ia32_ptestz256): Use new CODE_for_avx_ptestzv4di.
(__builtin_ia32_ptestc256): Use new CODE_for_avx_ptestcv4di.
* config/i386/i386-expand.cc (ix86_expand_branch): Use CCZmode
when expanding UNSPEC_PTEST to compare against zero.
* config/i386/i386-features.cc (scalar_chain::convert_compare):
Likewise generate CCZmode UNSPEC_PTESTs when converting comparisons.
Update or delete REG_EQUAL notes, converting CONST_INT and
CONST_WIDE_INT immediate operands to a suitable CONST_VECTOR.
(general_scalar_chain::convert_insn): Use CCZmode for COMPARE
result.
(timode_scalar_chain::convert_insn): Use CCZmode for COMPARE result.
* config/i386/i386-protos.h (ix86_match_ptest_ccmode): Prototype.
* config/i386/i386.cc (ix86_match_ptest_ccmode): New predicate to
check for suitable matching modes for the UNSPEC_PTEST pattern.
* config/i386/sse.md (define_split): When splitting UNSPEC_MOVMSK
to UNSPEC_PTEST, preserve the FLAG_REG mode as CCZ. 
(*_ptest): Add asterisk to hide define_insn.  Remove
":CC" mode of FLAGS_REG, instead use ix86_match_ptest_ccmode.
(_ptestz): New define_expand to specify CCZ.
(_ptestc): New define_expand to specify CCC.
(_ptest): A define_expand using CC to preserve the
current behavior.
(*ptest_and): Specify CCZ to only perform this optimization
when only the Z flag is required.

gcc/testsuite/ChangeLog
PR target/109973
PR target/110083
* gcc.target/i386/pr109973-1.c: New test case.
* gcc.target/i386/pr109973-2.c: Likewise.
* gcc.target/i386/pr110083.c: Likewise.


Thanks,
Roger
--

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 6dae697..37df018 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1004,8 +1004,8 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, 
CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv4sf2, 
"__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) 
V4SF_FTYPE_V4SF)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv4sf2_sfix, 
"__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) 
V4SI_FTYPE_V4SF)
 
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestv2di, 
"__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) 
INT_FTYPE_V2DI_V2DI_PTEST)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestv2di, 
"__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) 
INT_FTYPE_V2DI_V2DI_PTEST)
+BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestzv2di, 
"__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) 
INT_FTYPE_V2DI_V2DI_PTEST)
+BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestcv2di, 
"__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) 
INT_FTYPE_V2DI_V2DI_PTEST)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestv2di, 
"__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) 
INT_FTYPE_V2DI_V2DI_PTEST)
 
 /* SSE4.2 */
@@ -1164,8 +1164,8 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vtestpd256, 
"__builtin_ia32_vtestnzc
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vtestps256, 
"__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) 
INT_FTYPE_V8SF_V8SF_PTEST)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vtestps256, 
"__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) 
INT_FTYPE_V8SF_V8SF_PTEST)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vtestps256, 
"__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) 
INT_FTYPE_V8SF_V8SF_PTEST)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_ptestv4di, 
"__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) 
INT_FTYPE_V4DI_V4DI_PTEST)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_ptestv4di, 
"__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) 
INT_FTYPE_V4DI_V4DI_PTEST)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_ptestzv4di, 
"__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) 
INT_FTYPE_V4DI_V4DI_PTEST)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_ptestcv4di, 
"__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) 
INT_FTYPE_V4DI_V4DI_PTEST)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_ptestv4di, 
"__builtin_ia32_ptestnzc256", 

[PATCH] libstdc++: Clarify manual demangle doc

2023-06-10 Thread Jonny Grant
libstdc++-v3/ChangeLog:

* doc/xml/manual/extensions.xml: Remove demangle exception description 
and include.

---
 libstdc++-v3/doc/xml/manual/extensions.xml | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/doc/xml/manual/extensions.xml 
b/libstdc++-v3/doc/xml/manual/extensions.xml
index daa98f5cba7..d4fe2f509d4 100644
--- a/libstdc++-v3/doc/xml/manual/extensions.xml
+++ b/libstdc++-v3/doc/xml/manual/extensions.xml
@@ -514,12 +514,10 @@ get_temporary_buffer(5, (int*)0);
 you won't notice.)
   
   
-Probably the only times you'll be interested in demangling at runtime
-are when you're seeing typeid strings in RTTI, or when
-you're handling the runtime-support exception classes.  For example:
+Probably the only time you'll be interested in demangling at runtime
+is when you're seeing typeid strings in RTTI.  For example:
   

-#include exception
 #include iostream
 #include cstdlib
 #include cxxabi.h
-- 
2.37.2


[c++] Implement DR 976

2023-06-10 Thread Nathan Sidwell via Gcc-patches
DR 976 affects conversion operator deduction, swapping reference stripping and 
cv-qual removal.  This allows 'Y::operator P const & ()' to deduce T against a 
call wanting plain A  (previously that would fail as 'P const' cannot be deduced 
from 'A').


It also affects deductions for array- or function-producing conversions, which I 
suspect is rarer.


pushed to trunk

nathan
--
Nathan SidwellFrom 80f075b410125bddb31459428760645baba1a69f Mon Sep 17 00:00:00 2001
From: Nathan Sidwell 
Date: Sat, 10 Jun 2023 12:42:17 -0400
Subject: [PATCH] c++: Adjust conversion deduction [PR61663][DR976]

Drop the return type's reference before doing cvqual and related decays.

	gcc/cp/
	PR c++/61663
	* pt.cc (maybe_adjust_types_for_deduction): Implement DR976.
	gcc/testsuite/
	* g++.dg/template/pr61663.C: New.
---
 gcc/cp/pt.cc| 11 +++--
 gcc/testsuite/g++.dg/template/pr61663.C | 63 +
 2 files changed, 69 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/pr61663.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 6b20c58ce66..6a2cf2c123f 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -22725,10 +22725,16 @@ maybe_adjust_types_for_deduction (tree tparms,
   break;
 
 case DEDUCE_CONV:
+  /* [temp.deduct.conv] First remove a reference type on parm.
+	 DRs 322 & 976 affected this.  */
+  if (TYPE_REF_P (*parm))
+	*parm = TREE_TYPE (*parm);
+
   /* Swap PARM and ARG throughout the remainder of this
 	 function; the handling is precisely symmetric since PARM
 	 will initialize ARG rather than vice versa.  */
   std::swap (parm, arg);
+
   break;
 
 case DEDUCE_EXACT:
@@ -22795,11 +22801,6 @@ maybe_adjust_types_for_deduction (tree tparms,
   result |= UNIFY_ALLOW_OUTER_MORE_CV_QUAL;
 }
 
-  /* DR 322. For conversion deduction, remove a reference type on parm
- too (which has been swapped into ARG).  */
-  if (strict == DEDUCE_CONV && TYPE_REF_P (*arg))
-*arg = TREE_TYPE (*arg);
-
   return result;
 }
 
diff --git a/gcc/testsuite/g++.dg/template/pr61663.C b/gcc/testsuite/g++.dg/template/pr61663.C
new file mode 100644
index 000..2964fa6c309
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/pr61663.C
@@ -0,0 +1,63 @@
+// { dg-do compile { target c++11 } }
+// PR c++/61663
+// DR 976, strip ref from conv op return type before doing
+// fn and ary decay or CV qual removal
+
+struct F 
+{
+  template
+  operator const T&();
+};
+
+void Foo () 
+{
+  F f;
+  int i = f;
+}
+
+template struct X {};
+
+struct Y
+{
+  template operator X () &&; // #3
+  template operator X const & () const &; // #4
+};
+
+void Use (X);
+Y Val ();
+Y const  ();
+
+// { dg-final { scan-assembler "_Z5Frob3v:.*_ZNO1Ycv1XIT_EIvEEv.*_Z3Use1XIvE" } }
+void Frob3 ()
+{
+  Use (Val ()); // #3
+}
+
+// { dg-final { scan-assembler "_Z5Frob4v:.*_ZNKR1YcvRK1XIT_EIvEEv.*_Z3Use1XIvE" } }
+void Frob4 ()
+{
+  Use (Ref ()); // #4
+}
+
+struct Z 
+{
+  template using FnRef = void (&) (T);
+  template using AryRef = T (&)[];
+
+  template operator FnRef ();
+  template operator AryRef ();
+};
+
+// { dg-final { scan-assembler "_Z5Frob5R1Z:.*_ZN1ZcvRFvT_EIiEEv.*_ZN1ZcvRA_T_IiEEv" } }
+void Frob5 (Z )
+{
+  void (*fnptr)(int) = z;
+  int *iptr = z;
+}
+
+// { dg-final { scan-assembler "_Z5Frob6R1Z:.*_ZN1ZcvRFvT_EIfEEv.*_ZN1ZcvRA_T_IfEEv" } }
+void Frob6 (Z )
+{
+  void ()(float) = z;
+  float ()[] = z;
+}
-- 
2.40.1



Re: [PATCH] Implement ipa_vr hashing.

2023-06-10 Thread Aldy Hernandez via Gcc-patches




On 5/29/23 16:51, Martin Jambor wrote:

Hi,

On Mon, May 22 2023, Aldy Hernandez via Gcc-patches wrote:

Implement hashing for ipa_vr.  When all is said and done, all these
patches incurr a 7.64% slowdown for ipa-cp, with is entirely covered by
the similar 7% increase in this area last week.  So we get type agnostic
ranges with "infinite" range precision close to free.


Do you know why/where this slow-down happens?  Do we perhaps want to
limit the "infiniteness" a little somehow?


I addressed the slow down in another mail.



Also, jump functions live for a long time, have you looked at how memory
hungry they become?  I hope that the hashing would be good at preventing
any issues.


On a side-note, the caching does help.  On a (mistaken) hunch, I had
played around with removing caching for everything but UNDEFINED/VARYING 
and zero/nonzero to simplify things, but the cache hit ratio was still 
surprisingly high (+80%).  So good job there :-).




Generally, I think I OK with the patches if the impact on memory is not
too bad, though I guess they depend on the one I looked at last week, so
we may focus on that one first.


I'm not sure whether this was an OK for the other patches, given you 
approved the first patch, so I'll hold off until you give the go-ahead.


Thanks.
Aldy



libgfortran: remove support for --enable-intermodule

2023-06-10 Thread FX Coudert via Gcc-patches
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109373
I don’t believe it is widely used, and it was removed from everywhere else in 
gcc.

Bootstrapped and regtested on x86_64-pc-linux-gnu.
OK to commit?

FX



0001-libgfortran-remove-support-for-enable-intermodule.patch
Description: Binary data


Re: [PATCH] Convert ipcp_vr_lattice to type agnostic framework.

2023-06-10 Thread Aldy Hernandez via Gcc-patches




On 6/10/23 10:49, Martin Jambor wrote:

Hi,

thanks for dealing with my requests.

On Wed, Jun 07 2023, Aldy Hernandez wrote:

On 5/26/23 18:17, Martin Jambor wrote:

Hello,

On Mon, May 22 2023, Aldy Hernandez wrote:

I've adjusted the patch with some minor cleanups that came up when I
implemented the rest of the IPA revamp.

Rested.  OK?

On Wed, May 17, 2023 at 4:31 PM Aldy Hernandez  wrote:


This converts the lattice to store ranges in Value_Range instead of
value_range (*) to make it type agnostic, and adjust all users
accordingly.

I think it is a good example on converting from static ranges to more
general, type agnostic ones.

I've been careful to make sure Value_Range never ends up on GC, since
it contains an int_range_max and can expand on-demand onto the heap.
Longer term storage for ranges should be done with vrange_storage, as
per the previous patch ("Provide an API for ipa_vr").

(*) I do know the Value_Range naming versus value_range is quite
annoying, but it was a judgement call last release for the eventual
migration to having "value_range" be a type agnostic range object.  We
will ultimately rename Value_Range to value_range.


[...]


diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index d4b9d4ac27e..bd5b1da17b2 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -343,20 +343,29 @@ private:
   class ipcp_vr_lattice
   {
   public:
-  value_range m_vr;
+  Value_Range m_vr;

 inline bool bottom_p () const;
 inline bool top_p () const;
-  inline bool set_to_bottom ();
-  bool meet_with (const value_range *p_vr);
+  inline bool set_to_bottom (tree type);


Requiring a type when setting a lattice to bottom makes for a weird
interface, can't we set the underlying Value_Range to whatever... >

+  bool meet_with (const vrange _vr);
 bool meet_with (const ipcp_vr_lattice );
-  void init () { gcc_assert (m_vr.undefined_p ()); }
+  void init (tree type);
 void print (FILE * f);

   private:
-  bool meet_with_1 (const value_range *other_vr);
+  bool meet_with_1 (const vrange _vr);
   };

+inline void
+ipcp_vr_lattice::init (tree type)
+{
+  if (type)
+m_vr.set_type (type);
+
+  // Otherwise m_vr will default to unsupported_range.


...this does?

All users of the lattice check it for not being bottom first, so it
should be safe.

If it is not possible for some reason, then I guess we should add a bool
flag to ipcp_vr_lattice instead, rather than looking up types of
unusable lattices.  ipcp_vr_lattices don't live for long.


The type was my least favorite part of this work.  And yes, your
suggestion would work.  I have tweaked the patch to force a VARYING for
an unsupported range which seems to do the trick.  It looks much
cleaner.  Thanks.


This version is much better indeed.

[...]


@@ -1912,29 +1917,33 @@ ipa_vr_operation_and_type_effects (value_range *dst_vr,
   return false;

 range_op_handler handler (operation, dst_type);
-  return (handler
- && handler.fold_range (*dst_vr, dst_type,
-*src_vr, value_range (dst_type))
- && !dst_vr->varying_p ()
- && !dst_vr->undefined_p ());
+  if (!handler)
+return false;
+
+  Value_Range varying (dst_type);
+  varying.set_varying (dst_type);
+
+  return (handler.fold_range (dst_vr, dst_type, src_vr, varying)
+ && !dst_vr.varying_p ()
+ && !dst_vr.undefined_p ());
   }

   /* Determine value_range of JFUNC given that INFO describes the caller node 
or
  the one it is inlined to, CS is the call graph edge corresponding to JFUNC
  and PARM_TYPE of the parameter.  */

-value_range
-ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs,
+void
+ipa_value_range_from_jfunc (vrange ,
+   ipa_node_params *info, cgraph_edge *cs,
  ipa_jump_func *jfunc, tree parm_type)


I assume that you decided to return the value in a parameter passed by
reference instead of in return value for a good reason but then can we
at least...


vrange is an abstract type, plus it can be any size (int_range<3> has 3
sub-ranges, legacy value_range has 2 sub-ranges, frange is a totally
different object, etc).  Throughout all of ranger, returning a range is
done by passing by reference.  This has the added benefit that sometimes
we can set a return range by twiddling a few bits (foo.set_undefined())
instead of having to copy a full range back and forth.



I see, thanks.

[...]



How's this?


One minor observation below...



Aldy
 From 2fd0ae47aa094675a02763e72d7bb7404ed9334b Mon Sep 17 00:00:00 2001
From: Aldy Hernandez 
Date: Wed, 17 May 2023 11:29:34 +0200
Subject: [PATCH] Convert ipcp_vr_lattice to type agnostic framework.

This converts the lattice to store ranges in Value_Range instead of
value_range (*) to make it type agnostic, and adjust all users
accordingly.

I've been careful to make sure Value_Range never ends up on GC, since
it contains an int_range_max and can expand on-demand onto the heap.
Longer 

gcc/config.in was not regenerated

2023-06-10 Thread FX Coudert via Gcc-patches
Hi,

Building GCC in maintainer mode leads to changes in gcc/config.in 
:

> diff --git a/gcc/config.in b/gcc/config.in
> index 4cad077bfbe..25442c59aec 100644
> --- a/gcc/config.in
> +++ b/gcc/config.in
> @@ -67,6 +67,12 @@
>  #endif
> +/* Define to larger than one set the number of match.pd partitions to 
> make. */
> +#ifndef USED_FOR_TARGET
> +#undef DEFAULT_MATCHPD_PARTITIONS
> +#endif
> +
> +
>  /* Define to larger than zero set the default stack clash protector size. */
>  #ifndef USED_FOR_TARGET
>  #undef DEFAULT_STK_CLASH_GUARD_SIZE

which I think are because this commit 
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0a85544e1aaeca41133ecfc438cda913dbc0f122
should have regenerated and committed config.in 

Christina, can you please have a look?

FX

Re: Splitting up 27_io/basic_istream/ignore/wchar_t/94749.cc (takes too long)

2023-06-10 Thread Mike Stump via Gcc-patches
On Jun 9, 2023, at 2:47 PM, Bernhard Reutner-Fischer  
wrote:
> 
> On 9 June 2023 19:18:45 CEST, Mike Stump via Gcc-patches 
>  wrote:
>> simulation ports.  Maybe a 20-100x speedup? If you want to go this way I'd 
>> say do it in python at the bottom as it would be nice to switch over to 
>> python in the next 5-20 years and away from tcl.
> 
> Yes, i guess we have all pondered this for way long enough, but it is a lot 
> of work still.
> 
> The nice side effect would be that we see such hogs easier and earlier, at 
> least more comfortable. But well. Either way, what should we do about remote 
> env, if there is one? If the target supports it, send it and skip otherwise?

Testing is a large barrel of monkeys with a ton of small points, each of which 
is critical in some one. It is hard to talk about generalities when those 
details are very specific.  So, to focus a conversation, which target, which 
host, canadian? Which part of the environment? What part is missing you want to 
fix? Want to unify between targets... and so on.



Re: [PATCH 11/11] riscv: thead: Add support for the XTheadFMemIdx ISA extension

2023-06-10 Thread Jeff Law via Gcc-patches




On 4/28/23 00:23, Christoph Muellner wrote:

From: Christoph Müllner 

The XTheadFMemIdx ISA extension provides additional load and store
instructions for floating-point registers with new addressing modes.

The following memory accesses types are supported:
* ftype = [w,d] (single-precision, double-precision)

The following addressing modes are supported:
* register offset with additional immediate offset (4 instructions):
   flr, fsr
* zero-extended register offset with additional immediate offset
   (4 instructions): flur, fsur

These addressing modes are also part of the similar XTheadMemIdx
ISA extension support, whose code is reused and extended to support
floating-point registers.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_index_reg_class): Also allow
for XTheadFMemIdx.
(riscv_regno_ok_for_index_p): Likewise.
* config/riscv/thead-peephole.md (TARGET_64BIT):
Generalize peepholes for XTheadFMemIdx.
* config/riscv/thead.cc (is_fmemidx_mode): New function.
(th_memidx_classify_address_index): Add support for
XTheadFMemIdx.
(th_fmemidx_output_index): New function.
(th_output_move): Add support for XTheadFMemIdx.
* config/riscv/thead.md (*th_fmemidx_movsf_hardfloat): New INSN.
(*th_fmemidx_movdf_hardfloat_rv64): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/xtheadmemidx-helpers.h: Add helpers for
  XTheadMemFIdx.
* gcc.target/riscv/xtheadfmemidx-index-update.c: New test.
* gcc.target/riscv/xtheadfmemidx-index-xtheadbb-update.c: New test.
* gcc.target/riscv/xtheadfmemidx-index-xtheadbb.c: New test.
* gcc.target/riscv/xtheadfmemidx-index.c: New test.
* gcc.target/riscv/xtheadfmemidx-uindex-update.c: New test.
* gcc.target/riscv/xtheadfmemidx-uindex-xtheadbb-update.c: New test.
* gcc.target/riscv/xtheadfmemidx-uindex-xtheadbb.c: New test.
* gcc.target/riscv/xtheadfmemidx-uindex.c: New test.

Same core questions/comments as in patch #10 of this series.

jeff



Re: [PATCH 10/11] riscv: thead: Add support for the XTheadMemIdx ISA extension

2023-06-10 Thread Jeff Law via Gcc-patches




On 4/28/23 00:23, Christoph Muellner wrote:

From: Christoph Müllner 

The XTheadMemIdx ISA extension provides a additional load and store
instructions with new addressing modes.

The following memory accesses types are supported:
* ltype = [b,bu,h,hu,w,wu,d]
* stype = [b,h,w,d]

The following addressing modes are supported:
* immediate offset with PRE_MODIFY or POST_MODIFY (22 instructions):
   l.ia, l.ib, s.ia, s.ib
* register offset with additional immediate offset (11 instructions):
   lr, sr
* zero-extended register offset with additional immediate offset
   (11 instructions): lur, sur

The RISC-V base ISA does not support index registers, so the changes
are kept separate from the RISC-V standard support.

Similar like other extensions (Zbb, XTheadBb), this patch needs to
prevent the conversion of sign-extensions/zero-extensions into
shift instructions. The case of the zero-extended register offset
addressing mode is handled by a new peephole pass.

Handling the different cases of extensions results in a couple of INSNs
that look redundant on first view, but they are just the equivalent
of what we already have for Zbb as well. The only difference is, that
we have much more load instructions.

To fully utilize the capabilities of the instructions, there are
a few new peephole passes which fold shift amounts into the RTX
if possible. The added tests ensure that this feature won't
regress without notice.

We already have a constraint with the name 'th_f_fmv', therefore,
the new constraints follow this pattern and have the same length
as required ('th_m_mia', 'th_m_mib', 'th_m_mir', 'th_m_miu').

gcc/ChangeLog:

* config/riscv/constraints.md (th_m_mia): New constraint.
(th_m_mib): Likewise.
(th_m_mir): Likewise.
(th_m_miu): Likewise.
* config/riscv/riscv-protos.h (enum riscv_address_type):
Add new address types ADDRESS_REG_REG, ADDRESS_REG_UREG,
and ADDRESS_REG_WB and their documentation.
(struct riscv_address_info): Add new field 'shift' and
document the field usage for the new address types.
(riscv_valid_base_register_p): New prototype.
(th_memidx_legitimate_modify_p): Likewise.
(th_memidx_legitimate_index_p): Likewise.
(th_classify_address): Likewise.
(th_output_move): Likewise.
(th_print_operand_address): Likewise.
* config/riscv/riscv.cc (riscv_index_reg_class):
Return GR_REGS for XTheadMemIdx.
(riscv_regno_ok_for_index_p): Add support for XTheadMemIdx.
(riscv_classify_address): Call th_classify_address() on top.
(riscv_output_move): Call th_output_move() on top.
(riscv_print_operand_address): Call th_print_operand_address()
on top.
* config/riscv/riscv.h (HAVE_POST_MODIFY_DISP): New macro.
(HAVE_PRE_MODIFY_DISP): Likewise.
* config/riscv/riscv.md (zero_extendqi2): Disable
for XTheadMemIdx.
(*zero_extendqi2_internal): Convert to expand,
create INSN with same name and disable it for XTheadMemIdx.
(extendsidi2): Likewise.
(*extendsidi2_internal): Disable for XTheadMemIdx.
* config/riscv/thead-peephole.md: Add helper peephole passes.
* config/riscv/thead.cc (valid_signed_immediate): New helper
function.
(th_memidx_classify_address_modify): New function.
(th_memidx_legitimate_modify_p): Likewise.
(th_memidx_output_modify): Likewise.
(is_memidx_mode): Likewise.
(th_memidx_classify_address_index): Likewise.
(th_memidx_legitimate_index_p): Likewise.
(th_memidx_output_index): Likewise.
(th_classify_address): Likewise.
(th_output_move): Likewise.
(th_print_operand_address): Likewise.
* config/riscv/thead.md (*th_memidx_mov2):
New INSN.
(*th_memidx_zero_extendqi2): Likewise.
(*th_memidx_extendsidi2): Likewise
(*th_memidx_zero_extendsidi2): Likewise.
(*th_memidx_zero_extendhi2): Likewise.
(*th_memidx_extend2): Likewise
(*th_memidx_bb_zero_extendsidi2): Likewise.
(*th_memidx_bb_zero_extendhi2): Likewise.
(*th_memidx_bb_extendhi2): Likewise.
(*th_memidx_bb_extendqi2): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/xtheadmemidx-helpers.h: New test.
* gcc.target/riscv/xtheadmemidx-index-update.c: New test.
* gcc.target/riscv/xtheadmemidx-index-xtheadbb-update.c: New test.
* gcc.target/riscv/xtheadmemidx-index-xtheadbb.c: New test.
* gcc.target/riscv/xtheadmemidx-index.c: New test.
* gcc.target/riscv/xtheadmemidx-modify-xtheadbb.c: New test.
* gcc.target/riscv/xtheadmemidx-modify.c: New test.
* gcc.target/riscv/xtheadmemidx-uindex-update.c: New test.
* gcc.target/riscv/xtheadmemidx-uindex-xtheadbb-update.c: New test.
* gcc.target/riscv/xtheadmemidx-uindex-xtheadbb.c: New test.
* 

Re: Ping^^: [PATCH V2] extract DF/SF/SI/HI/QI subreg from parameter word on stack

2023-06-10 Thread Jeff Law via Gcc-patches




On 5/10/23 19:20, Jiufu Guo wrote:


Hi,

I would like to ping:
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609396.html

We know there are a few issues related to aggregate parameter and
returns.  I'm thinking if it is ok for trunk to use this patch to
resolve part of those issues.
It looks like the patch is focused on emitting a load of the object from 
memory into a GPR, then copying the GPR into pseudo (which hopefully 
gets allocated into an FPR).  That would seem to indicate the value got 
flushed to memory at some point.  Presumably because the type of the 
object it not one that we would typically allow in registers, except for 
some special cases for parameter passing and return values?


If that's the case, then is there any value in finding the flush to the 
stack and just emitting a copy from the GPR into the destination pseudo 
at that point?


Or is it just easier to construct a load from the flushback area and let 
CSE/DCE/DSE clean things up?


Jeff

I


Re: [patch,avr]: Improve bit-extractions as of PR109907.

2023-06-10 Thread Jeff Law via Gcc-patches




On 6/7/23 02:41, Georg-Johann Lay wrote:

Subject:
[patch,avr]: Improve bit-extractions as of PR109907.
From:
Georg-Johann Lay 
Date:
6/7/23, 02:41

To:
gcc-patches@gcc.gnu.org
CC:
Jeff Law , Denis Chertykov 


This patch improves bit-extractions on AVR.

Andrew added some patches so that more bit extractions are
recognized in the middle-end and rtl optimizers.

The patch adds pattern for "extzv" and replaces the
deprecated "extzv".

There are still situations where expensive shifts are passed
down to the backend though , and in one situation the backend
uses better sequences for right-shift with an offset of MSB:

Instead of ROL/CLR/ROL sequence that needs constraint "0" for
operand $1, BST/CLR/BLD just requires "r" for $1 thus less
register pressure.  Moreover, no scratch is required.

Asm out for (inverted) bit-extraction was out-sourced to a
C function which is more convenient.

Ok for master?

Johann

--

target/19907: Overhaul bit extractions.

o Logical right shift that shifts the MSB to position 0 can be performed in
   such a way that the input operand constraint can be relaxed from "0" 
to "r".
   This results in less register pressure.  Moreover, no scratch 
register is

   required in that case.

o The deprecated "extzv" pattern is replaced by "extzv" that allows
   inputs of scalar integer modes of different sizes (1 up to 4 bytes).

o Existing patterns are adjusted to the more generic "extzv" pattern.
   Some patterns are added as the middle-end has been reworked to spot
   more bit-extraction opportunities.

o A C function is used to print the asm for bit extractions, which is more
   convenient for complex output logic.

gcc/
 PR target/109907
 * config/avr/avr.md (adjust_len) [extr, extr_not]: New elements.
 (MSB, SIZE): New mode attributes.
 (any_shift): New code iterator.
 (*lshr3_split, *lshr3, lshr3)
 (*lshr3_const_split): Add constraint alternative for
 the case of shift-offset = MSB.  Ditch "length" attribute.
 (extzv, *extzv..subreg, *extzv.xor)
 (*extzv.ge, *neg.ashiftrt.msb, *extzv.io.lsr7): New.
 * config/avr/constraints.md (C15, C23, C31, Yil): New
 * config/avr/predicates.md (reg_or_low_io_operand)
 (const7_operand, reg_or_low_io_operand)
 (const15_operand, const_0_to_15_operand)
 (const23_operand, const_0_to_23_operand)
 (const31_operand, const_0_to_31_operand): New.
 * config/avr/avr-protos.h (avr_out_extr, avr_out_extr_not): New.
 * config/avr/avr.cc (avr_out_extr, avr_out_extr_not): New funcs.
 (lshrqi3_out, lshrhi3_out, lshrpsi3_out, lshrsi3_out): Adjust
 MSB case to new insn constraint "r" for operands[1].
 (avr_adjust_insn_length) [ADJUST_LEN_EXTR_NOT, ADJUST_LEN_EXTR]:
 Handle these cases.
 (avr_rtx_costs_1): Adjust cost for a new pattern.
gcc/testsuite/
 * gcc.target/avr/pr109907.c: New test.
 * gcc.target/avr/torture/pr109907-1.c: New test.
 * gcc.target/avr/torture/pr109907-2.c: New test.

pr109907-v2.diff

diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index ec96fd45865..229854a19db 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h



diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index a90cade35c7..f69d79bf14e 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -7142,9 +7142,9 @@ lshrqi3_out (rtx_insn *insn, rtx operands[], int *len)
  
  	case 7:

  *len = 3;
- return ("rol %0" CR_TAB
- "clr %0" CR_TAB
- "rol %0");
+ return ("bst %1,7" CR_TAB
+ "clr %0"   CR_TAB
+ "bld %0,0");
}

[ ... ]
Reminds me a lot of the H8 port.  The basic H8/300 variants can only 
shift a single bit at a time and the H8/S can only shift 2 at a time. 
So we synthesize all kinds of sequences to try and optimize shifts and 
bitfield extractions.



Anyway, much like the other patch, I did a cursory review, but you're 
really in a better position to judge correctness and profitability for 
the AVR bits.  So OK for the trunk.


jeff


Re: [patch,avr] Fix PR109650 wrong code

2023-06-10 Thread Jeff Law via Gcc-patches




On 5/19/23 02:49, Georg-Johann Lay wrote:

Subject:
Re: [patch,avr] Fix PR109650 wrong code
From:
Georg-Johann Lay 
Date:
5/19/23, 02:49

To:
gcc-patches@gcc.gnu.org
CC:
Jeff Law , Denis Chertykov , 
Senthil Kumar Selvaraj 



...Ok, and now with the patch attached...

Here is a revised version of the patch.  The difference to the
previous one is that it adds some combine patterns for *cbranch
insns that were lost in the PR92729 transition.  The post-reload
part of the patterns were still there.  The new patterns are
slightly more general in that they also handle fixed-point modes.

Apart from that, the patch behaves the same:

Am 15.05.23 um 20:05 schrieb Georg-Johann Lay:

This patch fixes a wrong-code bug in the wake of PR92729, the transition
that turned the AVR backend from cc0 to CCmode.  In cc0, the insn that
uses cc0 like a conditional branch always follows the cc0 setter, which
is no more the case with CCmode where set and use of REG_CC might be in
different basic blocks.

This patch removes the machine-dependent reorg pass in avr_reorg 
entirely.


It is replaced by a new, AVR specific mini-pass that runs prior to
split2. Canonicalization of comparisons away from the "difficult"
codes GT[U] and LE[U] is now mostly performed by implementing
TARGET_CANONICALIZE_COMPARISON.

Moreover:

* Text peephole conditions get "dead_or_set_regno_p (*, REG_CC)" as
needed.

* RTL peephole conditions get "peep2_regno_dead_p (*, REG_CC)" as
needed.

* Conditional branches no more clobber REG_CC.

* insn output for compares looks ahead to determine the branch mode in
use. This needs also "dead_or_set_regno_p (*, REG_CC)".

* Add RTL peepholes for decrement-and-branch detection.

Finally, it fixes some of the many indentation glitches left over from
PR92729.

Ok?

I'd also backport this one because all of v12+ is affected by the 
wrong code.


Johann

--

gcc/
 PR target/109650
 PR target/92729

 * config/avr/avr-passes.def (avr_pass_ifelse): Insert new pass.
 * config/avr/avr.cc (avr_pass_ifelse): New RTL pass.
 (avr_pass_data_ifelse): New pass_data for it.
 (make_avr_pass_ifelse, avr_redundant_compare, avr_cbranch_cost)
 (avr_canonicalize_comparison, avr_out_plus_set_ZN)
 (avr_out_cmp_ext): New functions.
 (compare_condtition): Make sure REG_CC dies in the branch insn.
 (avr_rtx_costs_1): Add computation of cbranch costs.
 (avr_adjust_insn_length) [ADJUST_LEN_ADD_SET_ZN, ADJUST_LEN_CMP_ZEXT]:
 [ADJUST_LEN_CMP_SEXT]Handle them.
 (TARGET_CANONICALIZE_COMPARISON): New define.
 (avr_simplify_comparison_p, compare_diff_p, avr_compare_pattern)
 (avr_reorg_remove_redundant_compare, avr_reorg): Remove functions.
 (TARGET_MACHINE_DEPENDENT_REORG): Remove define.

 * avr-protos.h (avr_simplify_comparison_p): Remove proto.
 (make_avr_pass_ifelse, avr_out_plus_set_ZN, cc_reg_rtx)
 (avr_out_cmp_zext): New Protos

 * config/avr/avr.md (branch, difficult_branch): Don't split insns.
 (*cbranchhi.zero-extend.0", *cbranchhi.zero-extend.1")
 (*swapped_tst, *add.for.eqne.): New insns.
 (*cbranch4): Rename to cbranch4_insn.
 (define_peephole): Add dead_or_set_regno_p(insn,REG_CC) as needed.
 (define_deephole2): Add peep2_regno_dead_p(*,REG_CC) as needed.
 Add new RTL peepholes for decrement-and-branch and *swapped_tst.
 Rework signtest-and-branch peepholes for *sbrx_branch.
 (adjust_len) [add_set_ZN, cmp_zext]: New.
 (QIPSI): New mode iterator.
 (ALLs1, ALLs2, ALLs4, ALLs234): New mode iterators.
 (gelt): New code iterator.
 (gelt_eqne): New code attribute.
 (rvbranch, *rvbranch, difficult_rvbranch, *difficult_rvbranch)
 (branch_unspec, *negated_tst, *reversed_tst)
 (*cmpqi_sign_extend): Remove insns.
 (define_c_enum "unspec") [UNSPEC_IDENTITY]: Remove.

 * config/avr/avr-dimode.md (cbranch4): Canonicalize comparisons.
 * config/avr/predicates.md (scratch_or_d_register_operand): New.
 * config/avr/contraints.md (Yxx): New constraint.

gcc/testsuite/
 PR target/109650
 * config/avr/torture/pr109650-1.c: New test.
 * config/avr/torture/pr109650-2.c: New test.
So I did a cursory review and didn't see anything obviously wrong. 
Given we haven't heard from Denis in a while I'll go ahead and ACK.


More importantly how do we want to handle things going forward?  I don't 
think my involvement in avr specific changes would bring any significant 
value and I don't expect to hear from Denis.


I could propose you as the maintainer of the avr port to the steering 
committee, but I wouldn't do that without knowing its a task you want to 
take on.



Jeff



Re: [PATCH V3] RISC-V: Rework Phase 5 && Phase 6 of VSETVL PASS

2023-06-10 Thread Jeff Law via Gcc-patches




On 6/9/23 17:11, juzhe.zh...@rivai.ai wrote:

From: Juzhe-Zhong 

Address comments from Jeff.

This patch is to rework Phase 5 && Phase 6 of VSETVL PASS since Phase 5 && 
Phase 6
are quite messy and cause some bugs discovered by my downstream 
auto-vectorization
test-generator.

Before this patch.

Phase 5 is cleanup_insns is the function remove AVL operand dependency from 
each RVV instruction.
E.g. vadd.vv (use a5), after Phase 5, > vadd.vv (use const_int 0). Since "a5" is used 
in "vsetvl" instructions and
after the correct "vsetvl" instructions are inserted, each RVV instruction doesn't need 
AVL operand "a5" anymore. Then,
we remove this operand dependency helps for the following scheduling PASS.

Phase 6 is propagate_avl do the following 2 things:
1. Local && Global user vsetvl instructions optimization.
E.g.
   vsetvli a2, a2, e8, mf8   ==> Change it into vsetvli a2, a2, e32, mf2
   vsetvli zero,a2, e32, mf2  ==> eliminate
2. Optimize user vsetvl from "vsetvl a2,a2" into "vsetvl zero,a2" if "a2" is 
not used by any instructions.
Since from Phase 1 ~ Phase 4 which inserts "vsetvli" instructions base on LCM 
which change the CFG, I re-new a new
RTL_SSA framework (which is more expensive than just using DF) for Phase 6 and 
optmize user vsetvli base on the new RTL_SSA.

There are 2 issues in Phase 5 && Phase 6:
1. local_eliminate_vsetvl_insn was introduced by @kito which can do better 
local user vsetvl optimizations better than
Phase 6 do, such approach doesn't need to re-new the RTL_SSA framework. So 
the local user vsetvli instructions optimizaiton
in Phase 6 is redundant and should be removed.
2. A bug discovered by my downstream auto-vectorization test-generator (I can't 
put the test in this patch since we are missing autovec
patterns for it so we can't use the upstream GCC directly reproduce such 
issue but I will remember put it back after I support the
necessary autovec patterns). Such bug is causing by using RTL_SSA re-new 
framework. The issue description is this:

Before Phase 6:

...
insn1: vsetlvi a3, 17 <== generated by SELECT_VL auto-vec pattern.
slli a4,a3,3
...
insn2: vsetvli zero, a3, ...
load (use const_int 0, before Phase 5, it's using a3, but the use of "a3" 
is removed in Phase 5)
...

In Phase 6, we iterate to insn2, then get the def of "a3" which is the insn1.
insn2 is the vsetvli instruction inserted in Phase 4 which is not included in 
the RLT_SSA framework
even though we renew it (I didn't take a look at it and I don't think we need 
to now).
Base on this situation, the def_info of insn2 has the information 
"set->single_nondebug_insn_use ()"
which return true. Obviously, this information is not correct, since insn1 has 
aleast 2 uses:
1). slli a4,a3,3 2).insn2: vsetvli zero, a3, ... Then, the test generated by my 
downstream test-generator
execution test failed.

Conclusion of RTL_SSA framework:
Before this patch, we initialize RTL_SSA 2 times. One is at the beginning of 
the VSETVL PASS which is absolutely correct, the other
is re-new after Phase 4 (LCM) has incorrect information that causes bugs.

Besides, we don't like to initialize RTL_SSA second time it seems to be a waste 
since we just need to do a little optimization.

Base on all circumstances I described above, I rework and reorganize Phase 5 && 
Phase 6 as follows:
1. Phase 5 is called ssa_post_optimization which is doing the optimization base 
on the RTL_SSA information (The RTL_SSA is initialized
at the beginning of the VSETVL PASS, no need to re-new it again). This 
phase includes 3 optimizaitons:
1). local_eliminate_vsetvl_insn we already have (no change).
2). global_eliminate_vsetvl_insn ---> new optimizaiton splitted from 
orignal Phase 6 but with more powerful and reliable implementation.
   E.g.
   void f(int8_t *base, int8_t *out, size_t vl, size_t m, size_t k) {
 size_t avl;
 if (m > 100)
   avl = __riscv_vsetvl_e16mf4(vl << 4);
 else
   avl = __riscv_vsetvl_e32mf2(vl >> 8);
 for (size_t i = 0; i < m; i++) {
   vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i, avl);
   v0 = __riscv_vadd_vv_i8mf8 (v0, v0, avl);
   __riscv_vse8_v_i8mf8(out + i, v0, avl);
 }
   }

   This example failed to global user vsetvl optimize before this patch:
   f:
   li  a5,100
   bleua3,a5,.L2
   sllia2,a2,4
   vsetvli a4,a2,e16,mf4,ta,mu
   .L3:
   li  a5,0
   vsetvli zero,a4,e8,mf8,ta,ma
   .L5:
   add a6,a0,a5
   add a2,a1,a5
   vle8.v  v1,0(a6)
   addia5,a5,1
   vadd.vv v1,v1,v1
   vse8.v  v1,0(a2)
   bgtua3,a5,.L5
   .L10:
   ret
   .L2:
   beq a3,zero,.L10
   srlia2,a2,8
   

Re: [patch] Fix PR101188 wrong code from postreload

2023-06-10 Thread Jeff Law via Gcc-patches



On 6/3/23 12:38, Georg-Johann Lay wrote:


    }

  note_stores (insn, move2add_note_store, insn);


The point is that in the continue block, the effect of the insn is
recorded even if !success, it's just the computed effect of the code.

Moreover, "next" is REG = REG + CONST_INT, so there are no REG_INC
notes, no?

Also I don't have any testcases that break other than the one
that has a clobber of a GPR along with the pointer addition.

I tried some "smart" solutions before, but all failed for some
reason, so I resorted to something that fixes the bug, and
*only* fixes the bug, and which has clearly no other side
effects than fixing the bug (I have to do all remote on compile
farm).  If a more elaborate fix is needed that also catches other
PRs, then I would hand this over to a postreload maintainer please.

Of particular importance for your case would be the note_stores call. 
But I could well see other targets needing the search for REG_INC 
notes as well as stack pushes.


If I'm right, then wouldn't it be better to factor that blob of code 
above into its own function, then use it before the "continue" rather 
than implementing a custom can for CLOBBERS?


I cannot answer that.  Maybe the authors of the code have some ideas.
Here's what I was thinking.  I don't have the bits here to build newlib 
or a simulator, so if you could give it a test it'd be appreciated.


I suspect the note_stores call is the important one in here since as you 
note we're dealing with simple arithmetic and I wouldn't expect to have 
REG_INC notes or SP autoincs in that scenario.


Jeffdiff --git a/gcc/postreload.cc b/gcc/postreload.cc
index 20e138b4fa8..856b7d6e22c 100644
--- a/gcc/postreload.cc
+++ b/gcc/postreload.cc
@@ -1899,6 +1899,79 @@ move2add_use_add3_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
   return changed;
 }
 
+/* Perform any invalidations necessary for INSN.  */
+
+void
+reload_cse_move2add_invalidate (rtx_insn *insn)
+{
+  for (rtx note = REG_NOTES (insn); note; note = XEXP (note, 1))
+{
+  if (REG_NOTE_KIND (note) == REG_INC
+	  && REG_P (XEXP (note, 0)))
+	{
+	  /* Reset the information about this register.  */
+	  int regno = REGNO (XEXP (note, 0));
+	  if (regno < FIRST_PSEUDO_REGISTER)
+	{
+	  move2add_record_mode (XEXP (note, 0));
+	  reg_mode[regno] = VOIDmode;
+	}
+	}
+}
+
+  /* There are no REG_INC notes for SP autoinc.  */
+  subrtx_var_iterator::array_type array;
+  FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
+{
+  rtx mem = *iter;
+  if (mem
+	  && MEM_P (mem)
+	  && GET_RTX_CLASS (GET_CODE (XEXP (mem, 0))) == RTX_AUTOINC)
+	{
+	  if (XEXP (XEXP (mem, 0), 0) == stack_pointer_rtx)
+	reg_mode[STACK_POINTER_REGNUM] = VOIDmode;
+	}
+}
+
+  note_stores (insn, move2add_note_store, insn);
+
+  /* If INSN is a conditional branch, we try to extract an
+ implicit set out of it.  */
+  if (any_condjump_p (insn))
+{
+  rtx cnd = fis_get_condition (insn);
+
+  if (cnd != NULL_RTX
+	  && GET_CODE (cnd) == NE
+	  && REG_P (XEXP (cnd, 0))
+	  && !reg_set_p (XEXP (cnd, 0), insn)
+	  /* The following two checks, which are also in
+	 move2add_note_store, are intended to reduce the
+	 number of calls to gen_rtx_SET to avoid memory
+	 allocation if possible.  */
+	  && SCALAR_INT_MODE_P (GET_MODE (XEXP (cnd, 0)))
+	  && REG_NREGS (XEXP (cnd, 0)) == 1
+	  && CONST_INT_P (XEXP (cnd, 1)))
+	{
+	  rtx implicit_set = gen_rtx_SET (XEXP (cnd, 0), XEXP (cnd, 1));
+	  move2add_note_store (SET_DEST (implicit_set), implicit_set, insn);
+	}
+}
+
+  /* If this is a CALL_INSN, all call used registers are stored with
+ unknown values.  */
+  if (CALL_P (insn))
+{
+  function_abi callee_abi = insn_callee_abi (insn);
+  for (int i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
+	if (reg_mode[i] != VOIDmode
+	&& reg_mode[i] != BLKmode
+	&& callee_abi.clobbers_reg_p (reg_mode[i], i))
+	/* Reset the information about this register.  */
+	  reg_mode[i] = VOIDmode;
+}
+}
+
 /* Convert move insns with constant inputs to additions if they are cheaper.
Return true if any changes were made.  */
 static bool
@@ -1921,7 +1994,7 @@ reload_cse_move2add (rtx_insn *first)
   move2add_luid = 2;
   for (insn = first; insn; insn = NEXT_INSN (insn), move2add_luid++)
 {
-  rtx set, note;
+  rtx set;
 
   if (LABEL_P (insn))
 	{
@@ -2041,6 +2114,12 @@ reload_cse_move2add (rtx_insn *first)
 			delete_insn (insn);
 		  changed |= success;
 		  insn = next;
+		  /* Make sure to perform any invalidations related to
+			 NEXT/INSN since we're going to bypass the normal
+			 flow with the continue below.
+
+			 Do this before recording the new mode/offset.  */
+		  reload_cse_move2add_invalidate (insn);
 		  move2add_record_mode (reg);
 		  reg_offset[regno]
 			= trunc_int_for_mode (added_offset + base_offset,
@@ -2094,74 +2173,7 @@ reload_cse_move2add (rtx_insn *first)
 	

Re: [PATCH 1/2] Implementation of new RISCV optimizations pass: fold-mem-offsets.

2023-06-10 Thread Jeff Law via Gcc-patches




On 5/25/23 06:35, Manolis Tsamis wrote:

Implementation of the new RISC-V optimization pass for memory offset
calculations, documentation and testcases.

gcc/ChangeLog:

* config.gcc: Add riscv-fold-mem-offsets.o to extra_objs.
* config/riscv/riscv-passes.def (INSERT_PASS_AFTER): Schedule a new
pass.
* config/riscv/riscv-protos.h (make_pass_fold_mem_offsets): Declare.
* config/riscv/riscv.opt: New options.
* config/riscv/t-riscv: New build rule.
* doc/invoke.texi: Document new option.
* config/riscv/riscv-fold-mem-offsets.cc: New file.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/fold-mem-offsets-1.c: New test.
* gcc.target/riscv/fold-mem-offsets-2.c: New test.
* gcc.target/riscv/fold-mem-offsets-3.c: New test.


So I made a small number of changes so that this could be run on other 
targets.



I had an hppa compiler handy, so it was trivial to do some light testing 
with that.  f-m-o didn't help at all on the included tests.  But I think 
that's more likely an artifact of the port supporting scaled indexed 
loads and doing fairly aggressive address rewriting to encourage that 
addressing mode.


Next I had an H8 compiler handy.  All three included tests showed 
improvement, both in terms of instruction count and size.  What was most 
interesting here is that f-m-o removed some redundant address 
calculations without needing to adjust the memory references which was a 
pleasant surprise.


Given the fact that both ports worked and the H8 showed an improvement, 
the next step was to put the patch into my tester.  It tests 30+ 
distinct processor families.  The goal wasn't to evaluate effectiveness, 
but to validate that those targets could still build their target 
libraries and successfully run their testsuites.


That's run through the various crosses.  Things like the hppa, alpha, 
m68k bootstraps only run once a week as they take many hours each.  The 
result is quite encouraging.  None of the crosses had any build issues 
or regressions.


The net result I think is we should probably move this to a target 
independent optimization pass.  We only need to generalize a few things.


Most importantly we need to get a resolution on the conditional I asked 
about inside get_single_def_in_bb.   There's some other refactoring I 
think we should do, but I'd really like to get a resolution on the code 
in get_single_def_in_bb first, then we ought to be able to move forward 
pretty quickly on the refactoring and integration.


jeff


Re: [PATCH] Fortran: add Fortran 2018 IEEE_{MIN,MAX} functions

2023-06-10 Thread FX Coudert via Gcc-patches
Given the agreement that the patch is not making things for powerpc worse, and 
the review by Steve, I have committed as 
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=17bccd1d2c0fa1f08e0483c8ed841994a95febb0

Best,
FX

Re: [PATCH] Fortran: add Fortran 2018 IEEE_{MIN,MAX} functions

2023-06-10 Thread FX Coudert via Gcc-patches
Hi Thomas,

> The KIND=17 is a bit of a kludge.  It is not visible for
> user programs, they use KIND=16, but this is then translated
> to library calls as if it was KIND=17 if the IEEE 128-bit floats
> are selected

Can you check what the IEEE test results are when -mabi=ieeelongdouble is 
enabled?
It’s not even clear to me what the IEEE kinds selected should be, in this case, 
depending on -mabi=ieeelongdouble


> Regarding FX's patch: I am not quite sure that I am
> actually testing the right thing if running the testsuite
> there, so POWER should not hold up this patch.  If it turns
> out that POWER needs additonal work on IEEE, we can always
> add that later.

Actually, it sounds like the situation is: the same target can have two ABIs 
based on a compile-time flag. That sounds like a job for multilib, i.e., we 
should compile libgfortran twice, one for each ABI. I am sure this was 
considered and rejected, do you remember what was the rationale?

Thanks,
FX

Re: [PATCH] Fortran: add IEEE_QUIET_* and IEEE_SIGNALING_* comparisons

2023-06-10 Thread FX Coudert via Gcc-patches
Hi Harald,

> I just looked at that thread.  I guess if you answer Mikael's
> questions at
>  https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601744.html
> the patch will be fine.

Amended patch, adding the required testing of signalling vs. quiet behaviour.
I still need to get an OK on the middle-end part first, but I consider the 
Fortran part approved.

Thanks,
FX



0001-Add-__builtin_iseqsig.patch
Description: Binary data


0002-Fortran-add-IEEE_QUIET_-and-IEEE_SIGNALING_-comparis.patch
Description: Binary data


Re: [RFC] Add stdckdint.h header for C23

2023-06-10 Thread Jakub Jelinek via Gcc-patches
On Sat, Jun 10, 2023 at 12:37:40PM +0200, Jakub Jelinek via Gcc-patches wrote:
> I think changing __builtin_classify_type behavior after 35 years
> would be dangerous, shall we introduce a new similar builtin which
> would just never promote the argument/perform array/function/enum
> conversions on it, so that
> __builtin_type_classify (true) == boolean_type_class
> enum E { E1, E2 } e;
> __builtin_type_classify (e) == enumeral_type_class
> int a[2];
> __builtin_type_classify (a) == array_type_class
> etc.?
> Seems clang changed __builtin_type_classify at some point
> so that it e.g. returns enumeral_type_class for enum arguments
> and array_type_class for arrays, but doesn't return boolean_type_class
> for _Bool argument.

Another option would be just extend the current __builtin_classify_type
to be more sizeof like, that the argument could be either expression with
current behavior, or type, and so one could use
__builtin_classify_type (int)
or
__builtin_classify_type (0)
or
__builtin_classify_type (typeof (expr))
and the last way would ensure no argument promotion happens.

Jakub



[RFC] Add stdckdint.h header for C23

2023-06-10 Thread Jakub Jelinek via Gcc-patches
Hi!

The following patch is an attempt to implement the C23 stdckdint.h
header on top of our GNU extension - __builtin_{add,sub,mul}_overflow
builtins.

I have looked at gnulib stdckdint.h and they are full of workarounds
for various compilers, EDG doesn't do this, clang <= 14 can't multiply
__int128, ..., so I think the header belongs into the compiler rather
than C library, because it would be a nightmare to maintain it there.

What I'm struggling with is enforcing the weird restrictions
C23 imposes on these.

The builtins error on the result pointer not being writable, or
having boolean or enumeral type (the reason for disallowing bool
was that it would be questionable whether it should act as if
storing to an unsigned 1-bit precision type which would overflow
if result is not in [0,1] or whether it would never overflow
for bool * result and simply store false if the infinite precision
result is 0 and true otherwise, and for enums because of the
uncertainities on just the enumerators vs. range from smallest to
largest enumerator vs. strict enum precision with underlying type).
They do allow storing result in plain char.  And the source operands
can have any integral types, including plain char, including booleans
and including enumeral types.  The plain is to allow even _BitInt(N)
as both source and result later on.

Now, C23 says that suitable types for both type2/type3 and type1
are integral types other than plain char, bool, a bit-precise integer type,
or an enumerated type.

And it also says:
It is recommended to produce a diagnostic message if type2 or type3 are
not suitable integer types, or if *result is not a modifiable lvalue of
a suitable integer type.

I've tried to first check it with:
  static_assert (_Generic ((a), char: 0, const char: 0, volatile char: 0, const 
volatile char: 0,
   default: __builtin_classify_type (a) - 1 <= 1U),
 "...")
but that only catches plain char and doesn't catch _Bool/bool and
doesn't catch enumerated types (note, for the *result we diagnose
it for the builtins, but not for the other args), because
__builtin_classify_type sadly promotes its argument.

The _Generic in the patch below is slightly better, it catches
also _Bool/bool, but doesn't catch enumerated types, comptypes
used by _Generic says enumeral type is compatible with the underlying
integer type.  But catching just plain char and bool would be
also doable with just _Generic listing the non-allowed types.

I think changing __builtin_classify_type behavior after 35 years
would be dangerous, shall we introduce a new similar builtin which
would just never promote the argument/perform array/function/enum
conversions on it, so that
__builtin_type_classify (true) == boolean_type_class
enum E { E1, E2 } e;
__builtin_type_classify (e) == enumeral_type_class
int a[2];
__builtin_type_classify (a) == array_type_class
etc.?
Seems clang changed __builtin_type_classify at some point
so that it e.g. returns enumeral_type_class for enum arguments
and array_type_class for arrays, but doesn't return boolean_type_class
for _Bool argument.

Also, shall we introduce __typeof_unqual__ keyword which could be used in
< C23 modes and perhaps C++?

2023-06-10  Jakub Jelinek  

* Makefile.in (USER_H): Add stdckdint.h.
* ginclude/stdckdint.h: New file.

* gcc.dg/stdckdint-1.c: New test.
* gcc.dg/stdckdint-2.c: New test.

--- gcc/Makefile.in.jj  2023-06-06 20:02:35.581211930 +0200
+++ gcc/Makefile.in 2023-06-10 10:17:05.062270115 +0200
@@ -466,6 +466,7 @@ USER_H = $(srcdir)/ginclude/float.h \
 $(srcdir)/ginclude/stdnoreturn.h \
 $(srcdir)/ginclude/stdalign.h \
 $(srcdir)/ginclude/stdatomic.h \
+$(srcdir)/ginclude/stdckdint.h \
 $(EXTRA_HEADERS)
 
 USER_H_INC_NEXT_PRE = @user_headers_inc_next_pre@
--- gcc/ginclude/stdckdint.h.jj 2023-06-10 09:20:39.154053338 +0200
+++ gcc/ginclude/stdckdint.h2023-06-10 12:02:33.454947780 +0200
@@ -0,0 +1,78 @@
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+.  */
+
+/* ISO C23: 

Re: [PATCH] Convert ipcp_vr_lattice to type agnostic framework.

2023-06-10 Thread Martin Jambor
Hi,

thanks for dealing with my requests.

On Wed, Jun 07 2023, Aldy Hernandez wrote:
> On 5/26/23 18:17, Martin Jambor wrote:
>> Hello,
>> 
>> On Mon, May 22 2023, Aldy Hernandez wrote:
>>> I've adjusted the patch with some minor cleanups that came up when I
>>> implemented the rest of the IPA revamp.
>>>
>>> Rested.  OK?
>>>
>>> On Wed, May 17, 2023 at 4:31 PM Aldy Hernandez  wrote:

 This converts the lattice to store ranges in Value_Range instead of
 value_range (*) to make it type agnostic, and adjust all users
 accordingly.

 I think it is a good example on converting from static ranges to more
 general, type agnostic ones.

 I've been careful to make sure Value_Range never ends up on GC, since
 it contains an int_range_max and can expand on-demand onto the heap.
 Longer term storage for ranges should be done with vrange_storage, as
 per the previous patch ("Provide an API for ipa_vr").

 (*) I do know the Value_Range naming versus value_range is quite
 annoying, but it was a judgement call last release for the eventual
 migration to having "value_range" be a type agnostic range object.  We
 will ultimately rename Value_Range to value_range.

[...]

 diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
 index d4b9d4ac27e..bd5b1da17b2 100644
 --- a/gcc/ipa-cp.cc
 +++ b/gcc/ipa-cp.cc
 @@ -343,20 +343,29 @@ private:
   class ipcp_vr_lattice
   {
   public:
 -  value_range m_vr;
 +  Value_Range m_vr;

 inline bool bottom_p () const;
 inline bool top_p () const;
 -  inline bool set_to_bottom ();
 -  bool meet_with (const value_range *p_vr);
 +  inline bool set_to_bottom (tree type);
>> 
>> Requiring a type when setting a lattice to bottom makes for a weird
>> interface, can't we set the underlying Value_Range to whatever... >
 +  bool meet_with (const vrange _vr);
 bool meet_with (const ipcp_vr_lattice );
 -  void init () { gcc_assert (m_vr.undefined_p ()); }
 +  void init (tree type);
 void print (FILE * f);

   private:
 -  bool meet_with_1 (const value_range *other_vr);
 +  bool meet_with_1 (const vrange _vr);
   };

 +inline void
 +ipcp_vr_lattice::init (tree type)
 +{
 +  if (type)
 +m_vr.set_type (type);
 +
 +  // Otherwise m_vr will default to unsupported_range.
>> 
>> ...this does?
>> 
>> All users of the lattice check it for not being bottom first, so it
>> should be safe.
>> 
>> If it is not possible for some reason, then I guess we should add a bool
>> flag to ipcp_vr_lattice instead, rather than looking up types of
>> unusable lattices.  ipcp_vr_lattices don't live for long.
>
> The type was my least favorite part of this work.  And yes, your 
> suggestion would work.  I have tweaked the patch to force a VARYING for 
> an unsupported range which seems to do the trick.  It looks much 
> cleaner.  Thanks.

This version is much better indeed.

[...]

 @@ -1912,29 +1917,33 @@ ipa_vr_operation_and_type_effects (value_range 
 *dst_vr,
   return false;

 range_op_handler handler (operation, dst_type);
 -  return (handler
 - && handler.fold_range (*dst_vr, dst_type,
 -*src_vr, value_range (dst_type))
 - && !dst_vr->varying_p ()
 - && !dst_vr->undefined_p ());
 +  if (!handler)
 +return false;
 +
 +  Value_Range varying (dst_type);
 +  varying.set_varying (dst_type);
 +
 +  return (handler.fold_range (dst_vr, dst_type, src_vr, varying)
 + && !dst_vr.varying_p ()
 + && !dst_vr.undefined_p ());
   }

   /* Determine value_range of JFUNC given that INFO describes the caller 
 node or
  the one it is inlined to, CS is the call graph edge corresponding to 
 JFUNC
  and PARM_TYPE of the parameter.  */

 -value_range
 -ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs,
 +void
 +ipa_value_range_from_jfunc (vrange ,
 +   ipa_node_params *info, cgraph_edge *cs,
  ipa_jump_func *jfunc, tree parm_type)
>> 
>> I assume that you decided to return the value in a parameter passed by
>> reference instead of in return value for a good reason but then can we
>> at least...
>
> vrange is an abstract type, plus it can be any size (int_range<3> has 3 
> sub-ranges, legacy value_range has 2 sub-ranges, frange is a totally 
> different object, etc).  Throughout all of ranger, returning a range is 
> done by passing by reference.  This has the added benefit that sometimes 
> we can set a return range by twiddling a few bits (foo.set_undefined()) 
> instead of having to copy a full range back and forth.
>

I see, thanks.

[...]

>
> How's this?

One minor observation below...

>
> Aldy
> From 

Re: [PATCH] (Re: Splitting up 27_io/basic_istream/ignore/wchar_t/94749.cc (takes too long))

2023-06-10 Thread Jonathan Wakely via Gcc-patches
On Sat, 10 Jun 2023, 06:18 Hans-Peter Nilsson via Libstdc++, <
libstd...@gcc.gnu.org> wrote:

> Thank you for your consideration.  (Or is that phrase only used
> negatively?)
>
> > From: Jonathan Wakely 
> > Date: Fri, 9 Jun 2023 21:40:15 +0100
>
> > test01, test02, test03 and test04 should run almost instantly. On my
> system
> > they take about 5 microseconds each. So I don't think splitting those up
> > will help.
>
> Right.
>
> > I thought it would help to avoid re-allocating the buffer and zeroing it
> > again. If we reuse the same buffer, then we just have to loop until we
> > overflow the 32-bit counter. That would make the whole test run much
> > faster, which would reduce the total time for a testsuite run. Splitting
> > the file up into smaller files would not decrease the total time, only
> > decrease the time for that single test so it doesn't time out.
> >
> > I've attached a patch that does that. I makes very little difference for
> > me, probably because allocating zero-filled pages isn't actually
> expensive
> > on linux. Maybe it will make a differene for your simulator though?
>
> Nope, just some five seconds down (from about 10min 21s).
>

Bah, worth a try :)


> > You could also try reducing the size of the buffer:
> > +#ifdef SIMULATOR_TEST
> > +  static const streamsize bufsz = 16 << limits::digits10;
> > +#else
> >   static const streamsize bufsz = 2048 << limits::digits10;
> > +#endif
>
> Was that supposed to be with or without the patch?  Anyway;
> both: 606s.  Only smaller bufsz: 614s.  (All numbers subject
> to usual system jitter.)
>
> > test06 is the really slow part, that takes 10+ seconds for me. But that
> > entire function should already be skipped for simulators.
>
> Yep, we may have been here before...  I certainly get a
> deja-vu feeling here, but visiting old email conversations
> of ours, it seems I easily conflate several similar ones.
> I see that here, test06 was always #ifndef SIMULATOR_TEST.
>
> > We can probably skip test05 for simulators too, none of the code it tests
> > is platform-specific, so as long as it's being tested on x86 we don't
> > really need to test it on cris-elf too.
>
> Thanks.  Let's do that, then.  The similar s/wchar_t/char/
> test clocks in at "only" 3m30s, but I suggest treating it
> the same, if nothing else than for symmetry.
>
> Ok as below?
>

OK for trunk, and all branches you care about.



> -- >8 --
> Subject: [PATCH] testsuite: Cut down 27_io/basic_istream/.../94749.cc for
>  simulators
>
> The test wchar_t/94749.cc can take about 10 minutes on some
> simulator/host combinations with char/94749.cc at a third of
> that time.  The cause is test05 which is quite heavy and
> includes wrapping a 32-bit counter.  Run it only for native
> setups.
>
> * testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc (main)
> [! SIMULATOR_TEST]: Also exclude running test05.
> * testsuite/27_io/basic_istream/ignore/char/94749.cc: Ditto.
> ---
>  libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/94749.cc | 2 +-
>  .../testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc   | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git
> a/libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/94749.cc
> b/libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/94749.cc
> index 6416863983b7..9160995c05ec 100644
> --- a/libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/94749.cc
> +++ b/libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/94749.cc
> @@ -221,8 +221,8 @@ main()
>test02();
>test03();
>test04();
> -  test05();
>  #ifndef SIMULATOR_TEST
> +  test05();
>test06();
>  #endif
>  }
> diff --git
> a/libstdc++-v3/testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc
> b/libstdc++-v3/testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc
> index 65e0a326c109..a5b9eb71a389 100644
> --- a/libstdc++-v3/testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc
> +++ b/libstdc++-v3/testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc
> @@ -221,8 +221,8 @@ main()
>test02();
>test03();
>test04();
> -  test05();
>  #ifndef SIMULATOR_TEST
> +  test05();
>test06();
>  #endif
>  }
> --
> 2.30.2
>
>